diff --git a/.github/workflows/python-check.yaml b/.github/workflows/python-check.yaml index 03878f47..aaa033f6 100644 --- a/.github/workflows/python-check.yaml +++ b/.github/workflows/python-check.yaml @@ -163,5 +163,6 @@ jobs: python3 -m venv .venv source .venv/bin/activate pip install icechunk['test'] --find-links dist --force-reinstall + pip install pytest-mypy-plugins # pass xarray's pyproject.toml so that pytest can find the `flaky` fixture pytest -c=../../xarray/pyproject.toml -W ignore tests/run_xarray_backends_tests.py diff --git a/docs/docs/icechunk-python/version-control.md b/docs/docs/icechunk-python/version-control.md index ca3f1e00..a053d55e 100644 --- a/docs/docs/icechunk-python/version-control.md +++ b/docs/docs/icechunk-python/version-control.md @@ -327,7 +327,7 @@ session2.rebase(icechunk.ConflictDetector()) # RebaseFailedError: Rebase failed on snapshot AE9XS2ZWXT861KD2JGHG: 1 conflicts found ``` -This however fails because both sessions modified the `foo` attribute on the root group. We can use the `ConflictError` to get more information about the conflict. +This however fails because both sessions modified metadata. We can use the `ConflictError` to get more information about the conflict. ```python try: @@ -335,13 +335,13 @@ try: except icechunk.RebaseFailedError as e: print(e.conflicts) -# [Conflict(UserAttributesDoubleUpdate, path=/)] +# [Conflict(ZarrMetadataDoubleUpdate, path=/)] ``` -This tells us that the conflict is caused by the two sessions modifying the user attributes of the root group (`/`). In this casewe have decided that second session set the `foo` attribute to the correct value, so we can now try to rebase by instructing the `rebase` method to use the second session's changes with the [`BasicConflictSolver`](../reference/#icechunk.BasicConflictSolver). +This tells us that the conflict is caused by the two sessions modifying the metadata attributes of the root group (`/`). In this case we have decided that second session set the `foo` attribute to the correct value, so we can now try to rebase by instructing the `rebase` method to use the second session's changes with the [`BasicConflictSolver`](../reference/#icechunk.BasicConflictSolver). ```python -session2.rebase(icechunk.BasicConflictSolver(on_user_attributes_conflict=icechunk.VersionSelection.UseOurs)) +session2.rebase(icechunk.BasicConflictSolver()) ``` Success! We can now try and commit the changes again. diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index 8dc188f6..0db86fc5 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -1393,7 +1393,6 @@ class BasicConflictSolver(ConflictSolver): This conflict solver allows for simple configuration of resolution behavior for conflicts that may occur during a rebase operation. It will attempt to resolve a limited set of conflicts based on the configuration options provided. - - When a user attribute conflict is encountered, the behavior is determined by the `on_user_attributes_conflict` option - When a chunk conflict is encountered, the behavior is determined by the `on_chunk_conflict` option - When an array is deleted that has been updated, `fail_on_delete_of_updated_array` will determine whether to fail the rebase operation - When a group is deleted that has been updated, `fail_on_delete_of_updated_group` will determine whether to fail the rebase operation @@ -1402,7 +1401,6 @@ class BasicConflictSolver(ConflictSolver): def __init__( self, *, - on_user_attributes_conflict: VersionSelection = VersionSelection.UseOurs, on_chunk_conflict: VersionSelection = VersionSelection.UseOurs, fail_on_delete_of_updated_array: bool = False, fail_on_delete_of_updated_group: bool = False, @@ -1411,8 +1409,6 @@ class BasicConflictSolver(ConflictSolver): Parameters ---------- - on_user_attributes_conflict: VersionSelection - The behavior to use when a user attribute conflict is encountered, by default VersionSelection.use_ours() on_chunk_conflict: VersionSelection The behavior to use when a chunk conflict is encountered, by default VersionSelection.use_theirs() fail_on_delete_of_updated_array: bool @@ -1476,39 +1472,36 @@ class ConflictType(Enum): Attributes: NewNodeConflictsWithExistingNode: int A new node conflicts with an existing node - NewNodeInInvalidGroup: int + NewNodeInInvalidGroup: tuple[int] A new node is in an invalid group - ZarrMetadataDoubleUpdate: int + ZarrMetadataDoubleUpdate: tuple[int] A zarr metadata update conflicts with an existing zarr metadata update - ZarrMetadataUpdateOfDeletedArray: int + ZarrMetadataUpdateOfDeletedArray: tuple[int] A zarr metadata update is attempted on a deleted array - UserAttributesDoubleUpdate: int - A user attribute update conflicts with an existing user attribute update - UserAttributesUpdateOfDeletedNode: int - A user attribute update is attempted on a deleted node - ChunkDoubleUpdate: int + ZarrMetadataUpdateOfDeletedGroup: tuple[int] + A zarr metadata update is attempted on a deleted group + ChunkDoubleUpdate: tuple[int] A chunk update conflicts with an existing chunk update - ChunksUpdatedInDeletedArray: int + ChunksUpdatedInDeletedArray: tuple[int] Chunks are updated in a deleted array - ChunksUpdatedInUpdatedArray: int + ChunksUpdatedInUpdatedArray: tuple[int] Chunks are updated in an updated array - DeleteOfUpdatedArray: int + DeleteOfUpdatedArray: tuple[int] A delete is attempted on an updated array - DeleteOfUpdatedGroup: int + DeleteOfUpdatedGroup: tuple[int] A delete is attempted on an updated group """ - NewNodeConflictsWithExistingNode = 1 - NewNodeInInvalidGroup = 2 - ZarrMetadataDoubleUpdate = 3 - ZarrMetadataUpdateOfDeletedArray = 4 - UserAttributesDoubleUpdate = 5 - UserAttributesUpdateOfDeletedNode = 6 - ChunkDoubleUpdate = 7 - ChunksUpdatedInDeletedArray = 8 - ChunksUpdatedInUpdatedArray = 9 - DeleteOfUpdatedArray = 10 - DeleteOfUpdatedGroup = 11 + NewNodeConflictsWithExistingNode = (1,) + NewNodeInInvalidGroup = (2,) + ZarrMetadataDoubleUpdate = (3,) + ZarrMetadataUpdateOfDeletedArray = (4,) + ZarrMetadataUpdateOfDeletedGroup = (5,) + ChunkDoubleUpdate = (6,) + ChunksUpdatedInDeletedArray = (7,) + ChunksUpdatedInUpdatedArray = (8,) + DeleteOfUpdatedArray = (9,) + DeleteOfUpdatedGroup = (10,) class Conflict: """A conflict detected between snapshots""" diff --git a/icechunk-python/src/conflicts.rs b/icechunk-python/src/conflicts.rs index 13d3702e..af169384 100644 --- a/icechunk-python/src/conflicts.rs +++ b/icechunk-python/src/conflicts.rs @@ -14,13 +14,12 @@ pub enum PyConflictType { NewNodeInInvalidGroup = 2, ZarrMetadataDoubleUpdate = 3, ZarrMetadataUpdateOfDeletedArray = 4, - UserAttributesDoubleUpdate = 5, - UserAttributesUpdateOfDeletedNode = 6, - ChunkDoubleUpdate = 7, - ChunksUpdatedInDeletedArray = 8, - ChunksUpdatedInUpdatedArray = 9, - DeleteOfUpdatedArray = 10, - DeleteOfUpdatedGroup = 11, + ZarrMetadataUpdateOfDeletedGroup = 5, + ChunkDoubleUpdate = 6, + ChunksUpdatedInDeletedArray = 7, + ChunksUpdatedInUpdatedArray = 8, + DeleteOfUpdatedArray = 9, + DeleteOfUpdatedGroup = 10, } impl Display for PyConflictType { @@ -31,13 +30,12 @@ impl Display for PyConflictType { } PyConflictType::NewNodeInInvalidGroup => "New node in invalid group", PyConflictType::ZarrMetadataDoubleUpdate => "Zarr metadata double update", + PyConflictType::ZarrMetadataUpdateOfDeletedGroup => { + "Zarr metadata update of deleted group" + } PyConflictType::ZarrMetadataUpdateOfDeletedArray => { "Zarr metadata update of deleted array" } - PyConflictType::UserAttributesDoubleUpdate => "User attributes double update", - PyConflictType::UserAttributesUpdateOfDeletedNode => { - "User attributes update of deleted node" - } PyConflictType::ChunkDoubleUpdate => "Chunk double update", PyConflictType::ChunksUpdatedInDeletedArray => { "Chunks updated in deleted array" @@ -108,13 +106,8 @@ impl From<&Conflict> for PyConflict { path: path.to_string(), conflicted_chunks: None, }, - Conflict::UserAttributesDoubleUpdate { path, node_id: _ } => PyConflict { - conflict_type: PyConflictType::UserAttributesDoubleUpdate, - path: path.to_string(), - conflicted_chunks: None, - }, - Conflict::UserAttributesUpdateOfDeletedNode(path) => PyConflict { - conflict_type: PyConflictType::UserAttributesUpdateOfDeletedNode, + Conflict::ZarrMetadataUpdateOfDeletedGroup(path) => PyConflict { + conflict_type: PyConflictType::ZarrMetadataUpdateOfDeletedGroup, path: path.to_string(), conflicted_chunks: None, }, @@ -188,9 +181,8 @@ pub struct PyBasicConflictSolver; #[pymethods] impl PyBasicConflictSolver { #[new] - #[pyo3(signature = (*, on_user_attributes_conflict=PyVersionSelection::UseOurs, on_chunk_conflict=PyVersionSelection::UseOurs, fail_on_delete_of_updated_array = false, fail_on_delete_of_updated_group = false))] + #[pyo3(signature = (*, on_chunk_conflict=PyVersionSelection::UseOurs, fail_on_delete_of_updated_array = false, fail_on_delete_of_updated_group = false))] fn new( - on_user_attributes_conflict: PyVersionSelection, on_chunk_conflict: PyVersionSelection, fail_on_delete_of_updated_array: bool, fail_on_delete_of_updated_group: bool, @@ -198,7 +190,6 @@ impl PyBasicConflictSolver { ( Self, PyConflictSolver(Arc::new(BasicConflictSolver { - on_user_attributes_conflict: on_user_attributes_conflict.into(), on_chunk_conflict: on_chunk_conflict.into(), fail_on_delete_of_updated_array, fail_on_delete_of_updated_group, diff --git a/icechunk-python/src/repository.rs b/icechunk-python/src/repository.rs index e8b83434..ab9da2e3 100644 --- a/icechunk-python/src/repository.rs +++ b/icechunk-python/src/repository.rs @@ -195,9 +195,9 @@ pub struct PyDiff { #[pyo3(get)] pub deleted_arrays: BTreeSet, #[pyo3(get)] - pub updated_user_attributes: BTreeSet, + pub updated_groups: BTreeSet, #[pyo3(get)] - pub updated_zarr_metadata: BTreeSet, + pub updated_arrays: BTreeSet, #[pyo3(get)] // A Vec instead of a set to avoid issues with list not being hashable in python pub updated_chunks: BTreeMap>>, @@ -213,16 +213,10 @@ impl From for PyDiff { value.deleted_groups.into_iter().map(|path| path.to_string()).collect(); let deleted_arrays = value.deleted_arrays.into_iter().map(|path| path.to_string()).collect(); - let updated_user_attributes = value - .updated_user_attributes - .into_iter() - .map(|path| path.to_string()) - .collect(); - let updated_zarr_metadata = value - .updated_zarr_metadata - .into_iter() - .map(|path| path.to_string()) - .collect(); + let updated_groups = + value.updated_groups.into_iter().map(|path| path.to_string()).collect(); + let updated_arrays = + value.updated_arrays.into_iter().map(|path| path.to_string()).collect(); let updated_chunks = value .updated_chunks .into_iter() @@ -238,8 +232,8 @@ impl From for PyDiff { new_arrays, deleted_groups, deleted_arrays, - updated_user_attributes, - updated_zarr_metadata, + updated_groups, + updated_arrays, updated_chunks, } } @@ -266,17 +260,17 @@ impl PyDiff { res.push('\n'); } - if !self.updated_zarr_metadata.is_empty() { - res.push_str("Zarr metadata updated:\n"); - for g in self.updated_zarr_metadata.iter() { + if !self.updated_groups.is_empty() { + res.push_str("Group definitions updated:\n"); + for g in self.updated_groups.iter() { writeln!(res, " {}", g).unwrap(); } res.push('\n'); } - if !self.updated_user_attributes.is_empty() { - res.push_str("User attributes updated:\n"); - for g in self.updated_user_attributes.iter() { + if !self.updated_arrays.is_empty() { + res.push_str("Array definitions updated:\n"); + for g in self.updated_arrays.iter() { writeln!(res, " {}", g).unwrap(); } res.push('\n'); diff --git a/icechunk-python/tests/data/test-repo/chunks/20BVFJQXWR84R1F1Q58G b/icechunk-python/tests/data/test-repo/chunks/20BVFJQXWR84R1F1Q58G new file mode 100644 index 00000000..8666a286 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/chunks/20BVFJQXWR84R1F1Q58G differ diff --git a/icechunk-python/tests/data/test-repo/chunks/5PCMGAGQ1FC1GKKHZTK0 b/icechunk-python/tests/data/test-repo/chunks/5PCMGAGQ1FC1GKKHZTK0 new file mode 100644 index 00000000..8666a286 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/chunks/5PCMGAGQ1FC1GKKHZTK0 differ diff --git a/icechunk-python/tests/data/test-repo/chunks/AN2V69NWNGNCW91839Q0 b/icechunk-python/tests/data/test-repo/chunks/AN2V69NWNGNCW91839Q0 new file mode 100644 index 00000000..8666a286 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/chunks/AN2V69NWNGNCW91839Q0 differ diff --git a/icechunk-python/tests/data/test-repo/chunks/HTKY8SN65KBX42E4V9FG b/icechunk-python/tests/data/test-repo/chunks/HTKY8SN65KBX42E4V9FG new file mode 100644 index 00000000..8666a286 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/chunks/HTKY8SN65KBX42E4V9FG differ diff --git a/icechunk-python/tests/data/test-repo/config.yaml b/icechunk-python/tests/data/test-repo/config.yaml index e3d9b500..65e4dd1e 100644 --- a/icechunk-python/tests/data/test-repo/config.yaml +++ b/icechunk-python/tests/data/test-repo/config.yaml @@ -5,14 +5,6 @@ compression: null caching: null storage: null virtual_chunk_containers: - gcs: - name: gcs - url_prefix: gcs - store: !gcs {} - az: - name: az - url_prefix: az - store: !azure {} tigris: name: tigris url_prefix: tigris @@ -21,6 +13,18 @@ virtual_chunk_containers: endpoint_url: https://fly.storage.tigris.dev anonymous: false allow_http: false + file: + name: file + url_prefix: file + store: !local_file_system '' + gcs: + name: gcs + url_prefix: gcs + store: !gcs {} + az: + name: az + url_prefix: az + store: !azure {} s3: name: s3 url_prefix: s3:// @@ -29,8 +33,4 @@ virtual_chunk_containers: endpoint_url: http://localhost:9000 anonymous: false allow_http: true - file: - name: file - url_prefix: file - store: !local_file_system '' manifest: null diff --git a/icechunk-python/tests/data/test-repo/manifests/0GQQ44D2837GGMHY81CG b/icechunk-python/tests/data/test-repo/manifests/0GQQ44D2837GGMHY81CG new file mode 100644 index 00000000..5dff37e3 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/manifests/0GQQ44D2837GGMHY81CG differ diff --git a/icechunk-python/tests/data/test-repo/manifests/73Q2CY1JSN768PFJS2M0 b/icechunk-python/tests/data/test-repo/manifests/73Q2CY1JSN768PFJS2M0 new file mode 100644 index 00000000..7b1848c2 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/manifests/73Q2CY1JSN768PFJS2M0 differ diff --git a/icechunk-python/tests/data/test-repo/manifests/8WT6R2E6WVC9GJ7BS6GG b/icechunk-python/tests/data/test-repo/manifests/8WT6R2E6WVC9GJ7BS6GG new file mode 100644 index 00000000..1d3cb4ab Binary files /dev/null and b/icechunk-python/tests/data/test-repo/manifests/8WT6R2E6WVC9GJ7BS6GG differ diff --git a/icechunk-python/tests/data/test-repo/manifests/C38XX4Z2517M93GQ5MA0 b/icechunk-python/tests/data/test-repo/manifests/C38XX4Z2517M93GQ5MA0 new file mode 100644 index 00000000..98ce9238 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/manifests/C38XX4Z2517M93GQ5MA0 differ diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json index 5b2aafdc..c7dc109c 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZW.json @@ -1 +1 @@ -{"snapshot":"A2RD2Y65PR6D3B6BR1K0"} \ No newline at end of file +{"snapshot":"HNG82GMS51ECXFXFCYJG"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json index 75370f32..0fc7cd40 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZX.json @@ -1 +1 @@ -{"snapshot":"K1BMYVG1HNVTNV1FSBH0"} \ No newline at end of file +{"snapshot":"GNFK0SSWD5B8CVA53XEG"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json index d26031d3..cf192bde 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZY.json @@ -1 +1 @@ -{"snapshot":"RPA0WQCNM2N9HBBRHJQ0"} \ No newline at end of file +{"snapshot":"3EKE17N8YF5ZK5NRMZJ0"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json index 66c78fb1..346eeb78 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.main/ZZZZZZZZ.json @@ -1 +1 @@ -{"snapshot":"6Q9GDTXKF17BGQVSQZFG"} \ No newline at end of file +{"snapshot":"R7F1RJHPZ428N4AK19K0"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json index c31e956b..a52be478 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZX.json @@ -1 +1 @@ -{"snapshot":"949AXZ49X764TMDC6D4G"} \ No newline at end of file +{"snapshot":"TNE0TX645A2G7VTXFA1G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json index b3ffccc0..d63e54c4 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZY.json @@ -1 +1 @@ -{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file +{"snapshot":"394QWZDXAY74HP6Q8P3G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json index 5b2aafdc..c7dc109c 100644 --- a/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json +++ b/icechunk-python/tests/data/test-repo/refs/branch.my-branch/ZZZZZZZZ.json @@ -1 +1 @@ -{"snapshot":"A2RD2Y65PR6D3B6BR1K0"} \ No newline at end of file +{"snapshot":"HNG82GMS51ECXFXFCYJG"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json index b3ffccc0..d63e54c4 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.deleted/ref.json @@ -1 +1 @@ -{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file +{"snapshot":"394QWZDXAY74HP6Q8P3G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json index c31e956b..a52be478 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.it also works!/ref.json @@ -1 +1 @@ -{"snapshot":"949AXZ49X764TMDC6D4G"} \ No newline at end of file +{"snapshot":"TNE0TX645A2G7VTXFA1G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json b/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json index b3ffccc0..d63e54c4 100644 --- a/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json +++ b/icechunk-python/tests/data/test-repo/refs/tag.it works!/ref.json @@ -1 +1 @@ -{"snapshot":"SNF98D1SK7NWD5KQJM20"} \ No newline at end of file +{"snapshot":"394QWZDXAY74HP6Q8P3G"} \ No newline at end of file diff --git a/icechunk-python/tests/data/test-repo/snapshots/394QWZDXAY74HP6Q8P3G b/icechunk-python/tests/data/test-repo/snapshots/394QWZDXAY74HP6Q8P3G new file mode 100644 index 00000000..3b09dece Binary files /dev/null and b/icechunk-python/tests/data/test-repo/snapshots/394QWZDXAY74HP6Q8P3G differ diff --git a/icechunk-python/tests/data/test-repo/snapshots/3EKE17N8YF5ZK5NRMZJ0 b/icechunk-python/tests/data/test-repo/snapshots/3EKE17N8YF5ZK5NRMZJ0 new file mode 100644 index 00000000..7171f403 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/snapshots/3EKE17N8YF5ZK5NRMZJ0 differ diff --git a/icechunk-python/tests/data/test-repo/snapshots/GNFK0SSWD5B8CVA53XEG b/icechunk-python/tests/data/test-repo/snapshots/GNFK0SSWD5B8CVA53XEG new file mode 100644 index 00000000..93decc5f Binary files /dev/null and b/icechunk-python/tests/data/test-repo/snapshots/GNFK0SSWD5B8CVA53XEG differ diff --git a/icechunk-python/tests/data/test-repo/snapshots/HNG82GMS51ECXFXFCYJG b/icechunk-python/tests/data/test-repo/snapshots/HNG82GMS51ECXFXFCYJG new file mode 100644 index 00000000..bd82739d Binary files /dev/null and b/icechunk-python/tests/data/test-repo/snapshots/HNG82GMS51ECXFXFCYJG differ diff --git a/icechunk-python/tests/data/test-repo/snapshots/R7F1RJHPZ428N4AK19K0 b/icechunk-python/tests/data/test-repo/snapshots/R7F1RJHPZ428N4AK19K0 new file mode 100644 index 00000000..493bb362 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/snapshots/R7F1RJHPZ428N4AK19K0 differ diff --git a/icechunk-python/tests/data/test-repo/snapshots/TNE0TX645A2G7VTXFA1G b/icechunk-python/tests/data/test-repo/snapshots/TNE0TX645A2G7VTXFA1G new file mode 100644 index 00000000..77790ad2 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/snapshots/TNE0TX645A2G7VTXFA1G differ diff --git a/icechunk-python/tests/data/test-repo/transactions/394QWZDXAY74HP6Q8P3G b/icechunk-python/tests/data/test-repo/transactions/394QWZDXAY74HP6Q8P3G new file mode 100644 index 00000000..0b4e36ae Binary files /dev/null and b/icechunk-python/tests/data/test-repo/transactions/394QWZDXAY74HP6Q8P3G differ diff --git a/icechunk-python/tests/data/test-repo/transactions/3EKE17N8YF5ZK5NRMZJ0 b/icechunk-python/tests/data/test-repo/transactions/3EKE17N8YF5ZK5NRMZJ0 new file mode 100644 index 00000000..4716947c Binary files /dev/null and b/icechunk-python/tests/data/test-repo/transactions/3EKE17N8YF5ZK5NRMZJ0 differ diff --git a/icechunk-python/tests/data/test-repo/transactions/GNFK0SSWD5B8CVA53XEG b/icechunk-python/tests/data/test-repo/transactions/GNFK0SSWD5B8CVA53XEG new file mode 100644 index 00000000..29b3ccfc Binary files /dev/null and b/icechunk-python/tests/data/test-repo/transactions/GNFK0SSWD5B8CVA53XEG differ diff --git a/icechunk-python/tests/data/test-repo/transactions/HNG82GMS51ECXFXFCYJG b/icechunk-python/tests/data/test-repo/transactions/HNG82GMS51ECXFXFCYJG new file mode 100644 index 00000000..632c49e7 Binary files /dev/null and b/icechunk-python/tests/data/test-repo/transactions/HNG82GMS51ECXFXFCYJG differ diff --git a/icechunk-python/tests/data/test-repo/transactions/TNE0TX645A2G7VTXFA1G b/icechunk-python/tests/data/test-repo/transactions/TNE0TX645A2G7VTXFA1G new file mode 100644 index 00000000..cd830e5b Binary files /dev/null and b/icechunk-python/tests/data/test-repo/transactions/TNE0TX645A2G7VTXFA1G differ diff --git a/icechunk-python/tests/test_can_read_old.py b/icechunk-python/tests/test_can_read_old.py index 39f39be5..2e89c2d0 100644 --- a/icechunk-python/tests/test_can_read_old.py +++ b/icechunk-python/tests/test_can_read_old.py @@ -271,8 +271,8 @@ async def test_icechunk_can_read_old_repo() -> None: ) assert diff.deleted_groups == set() assert diff.deleted_arrays == set() - assert diff.updated_user_attributes == set() - assert diff.updated_zarr_metadata == set() + assert diff.updated_groups == set() + assert diff.updated_arrays == set() if __name__ == "__main__": diff --git a/icechunk-python/tests/test_conflicts.py b/icechunk-python/tests/test_conflicts.py index 1f7c4ded..2615c4ea 100644 --- a/icechunk-python/tests/test_conflicts.py +++ b/icechunk-python/tests/test_conflicts.py @@ -48,18 +48,22 @@ def test_detect_conflicts(repo: icechunk.Repository) -> None: try: session_b.rebase(icechunk.ConflictDetector()) except icechunk.RebaseFailedError as e: - assert len(e.conflicts) == 2 + assert len(e.conflicts) == 3 + assert e.conflicts[0].path == "/foo/bar/some-array" assert e.conflicts[0].path == "/foo/bar/some-array" assert ( e.conflicts[0].conflict_type - == icechunk.ConflictType.UserAttributesDoubleUpdate + == icechunk.ConflictType.ZarrMetadataDoubleUpdate ) - assert e.conflicts[0].conflicted_chunks is None - assert e.conflicts[1].path == "/foo/bar/some-array" - assert e.conflicts[1].conflict_type == icechunk.ConflictType.ChunkDoubleUpdate - assert e.conflicts[1].conflicted_chunks - assert len(e.conflicts[1].conflicted_chunks) == 100 + assert ( + e.conflicts[1].conflict_type + == icechunk.ConflictType.ChunksUpdatedInUpdatedArray + ) + assert e.conflicts[2].path == "/foo/bar/some-array" + assert e.conflicts[2].conflict_type == icechunk.ConflictType.ChunkDoubleUpdate + assert e.conflicts[2].conflicted_chunks + assert len(e.conflicts[2].conflicted_chunks) == 100 raise e @@ -94,7 +98,7 @@ def test_rebase_no_conflicts(repo: icechunk.Repository) -> None: "on_chunk_conflict", [icechunk.VersionSelection.UseOurs, icechunk.VersionSelection.UseTheirs], ) -def test_rebase_user_attrs_with_ours( +def test_rebase_fails_on_user_atts_double_edit( repo: icechunk.Repository, on_chunk_conflict: icechunk.VersionSelection ) -> None: session_a = repo.writable_session("main") @@ -116,24 +120,7 @@ def test_rebase_user_attrs_with_ours( # Make sure it fails if the resolver is not set with pytest.raises(icechunk.RebaseFailedError): - session_b.rebase( - icechunk.BasicConflictSolver( - on_user_attributes_conflict=icechunk.VersionSelection.Fail - ) - ) - - solver = icechunk.BasicConflictSolver( - on_user_attributes_conflict=icechunk.VersionSelection.UseOurs, - ) - - session_b.rebase(solver) - session_b.commit("after conflict") - - assert ( - array_b.attrs["repo"] == 2 - if on_chunk_conflict == icechunk.VersionSelection.UseOurs - else 1 - ) + session_b.rebase(icechunk.BasicConflictSolver()) @pytest.mark.parametrize( diff --git a/icechunk-python/tests/test_timetravel.py b/icechunk-python/tests/test_timetravel.py index feab90d0..09a08cf2 100644 --- a/icechunk-python/tests/test_timetravel.py +++ b/icechunk-python/tests/test_timetravel.py @@ -42,8 +42,8 @@ def test_timetravel() -> None: ) assert status.deleted_groups == set() assert status.deleted_arrays == set() - assert status.updated_user_attributes == {"/", "/air_temp"} # why? - assert status.updated_zarr_metadata == set() + assert status.updated_arrays == set() + assert status.updated_groups == set() first_snapshot_id = session.commit("commit 1") assert session.read_only @@ -137,8 +137,8 @@ def test_timetravel() -> None: ) assert diff.deleted_groups == set() assert diff.deleted_arrays == set() - assert diff.updated_user_attributes == {"/", "/air_temp"} # why? - assert diff.updated_zarr_metadata == set() + assert diff.updated_arrays == set() + assert diff.updated_groups == set() assert ( repr(diff) == """\ @@ -148,10 +148,6 @@ def test_timetravel() -> None: Arrays created: /air_temp -User attributes updated: - / - /air_temp - Chunks updated: /air_temp: [0, 0] @@ -168,6 +164,25 @@ def test_timetravel() -> None: """ ) + session = repo.writable_session("main") + store = session.store + + group = zarr.open_group(store=store) + air_temp = group.create_array( + "air_temp", shape=(1000, 1000), chunks=(100, 100), dtype="i4", overwrite=True + ) + assert ( + repr(session.status()) + == """\ +Arrays created: + /air_temp + +Arrays deleted: + /air_temp + +""" + ) + with pytest.raises(ValueError, match="doesn't include"): # if we call diff in the wrong order it fails with a message repo.diff(from_tag="v1.0", to_snapshot=parents[-1].id) diff --git a/icechunk-python/tests/test_zarr/test_properties.py b/icechunk-python/tests/test_zarr/test_properties.py index 86444aaa..58656e99 100644 --- a/icechunk-python/tests/test_zarr/test_properties.py +++ b/icechunk-python/tests/test_zarr/test_properties.py @@ -1,6 +1,5 @@ from typing import Any -import numpy as np import pytest from numpy.testing import assert_array_equal @@ -40,8 +39,6 @@ def create() -> IcechunkStore: def test_roundtrip(data: st.DataObject, nparray: Any) -> None: # TODO: support size-0 arrays GH392 assume(nparray.size > 0) - # TODO: fix complex fill values GH391 - assume(not np.iscomplexobj(nparray)) zarray = data.draw( arrays( diff --git a/icechunk-python/tests/test_zarr/test_stateful.py b/icechunk-python/tests/test_zarr/test_stateful.py index 031fa6ae..a3477848 100644 --- a/icechunk-python/tests/test_zarr/test_stateful.py +++ b/icechunk-python/tests/test_zarr/test_stateful.py @@ -106,8 +106,6 @@ def add_array( array, _ = array_and_chunks # TODO: support size-0 arrays GH392 assume(array.size > 0) - # TODO: fix complex fill values GH391 - assume(not np.iscomplexobj(array)) super().add_array(data, name, array_and_chunks) ##### TODO: port everything below to zarr diff --git a/icechunk/examples/low_level_dataset.rs b/icechunk/examples/low_level_dataset.rs index 8088f82e..17b6ccaa 100644 --- a/icechunk/examples/low_level_dataset.rs +++ b/icechunk/examples/low_level_dataset.rs @@ -1,12 +1,9 @@ #![allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] -use std::{collections::HashMap, iter, num::NonZeroU64, sync::Arc}; +use std::{collections::HashMap, sync::Arc}; +use bytes::Bytes; use icechunk::{ - format::{manifest::ChunkPayload, snapshot::ZarrArrayMetadata, ChunkIndices, Path}, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - UserAttributes, - }, + format::{manifest::ChunkPayload, snapshot::ArrayShape, ChunkIndices, Path}, repository::VersionInfo, session::{Session, SessionError}, storage::new_in_memory_storage, @@ -46,9 +43,10 @@ ds.add_group("/group2".into()).await?; "#, ); - ds.add_group(Path::root()).await?; - ds.add_group("/group1".try_into().unwrap()).await?; - ds.add_group("/group2".try_into().unwrap()).await?; + let user_data = Bytes::new(); + ds.add_group(Path::root(), user_data.clone()).await?; + ds.add_group("/group1".try_into().unwrap(), user_data.clone()).await?; + ds.add_group("/group2".try_into().unwrap(), user_data.clone()).await?; println!(); print_nodes(&ds).await?; @@ -96,58 +94,13 @@ ds.add_array(array1_path.clone(), zarr_meta1).await?; "#, ); - let zarr_meta1 = ZarrArrayMetadata { - shape: vec![3], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { - name: "mycodec".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; + let shape = ArrayShape::new(vec![(3, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into()]); let array1_path: Path = "/group1/array1".try_into().unwrap(); - ds.add_array(array1_path.clone(), zarr_meta1).await?; + ds.add_array(array1_path.clone(), shape, dimension_names, user_data.clone()).await?; println!(); print_nodes(&ds).await?; - println!(); - println!(); - println!("## Setting array user attributes"); - println!( - r#" -``` -ds.set_user_attributes(array1_path.clone(), Some("{{n:42}}".to_string())).await?; -``` - "#, - ); - ds.set_user_attributes( - array1_path.clone(), - Some(UserAttributes::try_new(br#"{"n":42}"#).unwrap()), - ) - .await?; - print_nodes(&ds).await?; - println!("## Committing"); let v1_id = ds.commit("some message", Default::default()).await?; println!( @@ -288,14 +241,7 @@ async fn print_nodes(ds: &Session) -> Result<(), SessionError> { .await? .map(|n| n.unwrap()) .sorted_by_key(|n| n.path.clone()) - .map(|node| { - format!( - "|{:10?}|{:15}|{:10?}\n", - node.node_type(), - node.path.to_string(), - node.user_attributes, - ) - }) + .map(|node| format!("|{:10?}|{:15}\n", node.node_type(), node.path.to_string(),)) .format(""); println!("{}", rows); diff --git a/icechunk/examples/multithreaded_get_chunk_refs.rs b/icechunk/examples/multithreaded_get_chunk_refs.rs index b0a94d85..1c6769d7 100644 --- a/icechunk/examples/multithreaded_get_chunk_refs.rs +++ b/icechunk/examples/multithreaded_get_chunk_refs.rs @@ -13,22 +13,19 @@ use std::{ collections::HashMap, env::{self}, - num::NonZeroU64, sync::Arc, time::Instant, }; +use bytes::Bytes; use futures::{stream::FuturesUnordered, StreamExt}; use icechunk::{ config::CompressionConfig, format::{ manifest::{ChunkPayload, ChunkRef}, - snapshot::ZarrArrayMetadata, + snapshot::ArrayShape, ChunkId, ChunkIndices, Path, }, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, new_local_filesystem_storage, repository::VersionInfo, session::Session, @@ -62,30 +59,12 @@ async fn mk_repo( async fn do_writes(path: &std::path::Path) -> Result<(), Box> { let repo = mk_repo(path).await?; let mut session = repo.writable_session("main").await?; - let meta = ZarrArrayMetadata { - shape: vec![MAX_I, MAX_J, MAX_L, MAX_K], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; + let shape = + ArrayShape::new(vec![(MAX_I, 1), (MAX_J, 1), (MAX_K, 1), (MAX_L, 1)]).unwrap(); + let user_data = Bytes::new(); + let dimension_names = Some(vec!["i".into(), "j".into(), "k".into(), "l".into()]); let path: Path = "/array".try_into().unwrap(); - session.add_array(path.clone(), meta).await?; + session.add_array(path.clone(), shape, dimension_names, user_data).await?; session.commit("array created", None).await?; let session = Arc::new(RwLock::new(repo.writable_session("main").await?)); diff --git a/icechunk/flatbuffers/snapshot.fbs b/icechunk/flatbuffers/snapshot.fbs index 7f36da33..157762a6 100644 --- a/icechunk/flatbuffers/snapshot.fbs +++ b/icechunk/flatbuffers/snapshot.fbs @@ -24,33 +24,6 @@ struct ManifestFileInfo { num_chunk_refs: uint32; } -// a pointer to a user attributes file -struct AttributeFileInfo { - // id of the object in the repo's object store - id: ObjectId12; -} - -// a pointer to a user attributes file -table UserAttributesRef { - // id of the object in the repo's object store - object_id: ObjectId12 (required); - - // index where the user attributes for the array start - location: uint32; -} - -// user attributes written inline -table InlineUserAttributes { - // user attributes data, serialized as rmp_serde of the json value - // TODO: better serialization format - data :[uint8] (required); -} - -union UserAttributesSnapshot { - Inline :InlineUserAttributes, - Reference :UserAttributesRef, -} - // A range of chunk indexes struct ChunkIndexRange { // inclusive @@ -69,15 +42,25 @@ table ManifestRef { extents: [ChunkIndexRange] (required); } +// the shape of the array along a given dimension +struct DimensionShape { + array_length: uint64; + chunk_length: uint64; +} + +table DimensionName { + // optional + name: string; +} + // a marker for a group node table GroupNodeData {} // data for an array node table ArrayNodeData { - // the zarr metadata for the array - // serialized as rmp_serde of the json value - // TODO: better serialization format - zarr_metadata: [uint8] (required); + shape: [DimensionShape] (required); + + dimension_names: [DimensionName]; // pointers to all the manifests where this array has chunk references manifests: [ManifestRef] (required); @@ -97,8 +80,9 @@ table NodeSnapshot { // absolute path of the node within the repo path: string (required); - // pointer to the user attributes for the node - user_attributes: UserAttributesSnapshot; + // the metadata for the node according to what the user passed + // this will generally be the full zarr metadata for the node + user_data: [uint8] (required); // node's data node_data: NodeData (required); @@ -128,10 +112,6 @@ table Snapshot { // the list of all manifest files this snapshot points to // sorted in ascending order of ManifestFileInfo.id manifest_files: [ManifestFileInfo] (required); - - // the list of all attribute files this snapshot points to - // sorted in ascending order of AttributeFileInfo.id - attribute_files: [AttributeFileInfo] (required); } root_type Snapshot; diff --git a/icechunk/flatbuffers/transaction_log.fbs b/icechunk/flatbuffers/transaction_log.fbs index 3afbe564..3f996119 100644 --- a/icechunk/flatbuffers/transaction_log.fbs +++ b/icechunk/flatbuffers/transaction_log.fbs @@ -36,13 +36,13 @@ table TransactionLog { // sorted in ascending order deleted_arrays: [ObjectId8] (required); - // node ids of the nodes that had user attributes modified in this transaction + // node ids of the groups that had user definitions modified in this transaction // sorted in ascending order - updated_user_attributes: [ObjectId8] (required); + updated_arrays: [ObjectId8] (required); - // node ids of the nodes that had zarr metadata modified in this transaction + // node ids of the arrays that had user definitions modified in this transaction // sorted in ascending order - updated_zarr_metadata: [ObjectId8] (required); + updated_groups: [ObjectId8] (required); // chunk ref changes made in this transaction // sorted in ascending order of the node_id of the ArrayUpdatedChunks diff --git a/icechunk/src/change_set.rs b/icechunk/src/change_set.rs index 1bca04ed..130f465d 100644 --- a/icechunk/src/change_set.rs +++ b/icechunk/src/change_set.rs @@ -4,27 +4,32 @@ use std::{ mem::take, }; +use bytes::Bytes; use itertools::{Either, Itertools as _}; use serde::{Deserialize, Serialize}; use crate::{ format::{ manifest::{ChunkInfo, ChunkPayload}, - snapshot::{NodeData, NodeSnapshot, UserAttributesSnapshot, ZarrArrayMetadata}, + snapshot::{ArrayShape, DimensionName, NodeData, NodeSnapshot}, ChunkIndices, NodeId, Path, }, - metadata::UserAttributes, session::SessionResult, }; -#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct ArrayData { + pub shape: ArrayShape, + pub dimension_names: Option>, + pub user_data: Bytes, +} + +#[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize)] pub struct ChangeSet { - new_groups: HashMap, - new_arrays: HashMap, - updated_arrays: HashMap, - // These paths may point to Arrays or Groups, - // since both Groups and Arrays support UserAttributes - updated_attributes: HashMap>, + new_groups: HashMap, + new_arrays: HashMap, + updated_arrays: HashMap, + updated_groups: HashMap, // It's important we keep these sorted, we use this fact in TransactionLog creation set_chunks: BTreeMap>>, deleted_groups: HashSet<(Path, NodeId)>, @@ -32,10 +37,6 @@ pub struct ChangeSet { } impl ChangeSet { - pub fn zarr_updated_arrays(&self) -> impl Iterator { - self.updated_arrays.keys() - } - pub fn deleted_arrays(&self) -> impl Iterator { self.deleted_arrays.iter() } @@ -44,8 +45,12 @@ impl ChangeSet { self.deleted_groups.iter() } - pub fn user_attributes_updated_nodes(&self) -> impl Iterator { - self.updated_attributes.keys() + pub fn updated_arrays(&self) -> impl Iterator { + self.updated_arrays.keys() + } + + pub fn updated_groups(&self) -> impl Iterator { + self.updated_groups.keys() } pub fn array_is_deleted(&self, path_and_id: &(Path, NodeId)) -> bool { @@ -71,39 +76,55 @@ impl ChangeSet { self == &ChangeSet::default() } - pub fn add_group(&mut self, path: Path, node_id: NodeId) { - self.new_groups.insert(path, node_id); + pub fn add_group(&mut self, path: Path, node_id: NodeId, definition: Bytes) { + debug_assert!(!self.updated_groups.contains_key(&node_id)); + self.new_groups.insert(path, (node_id, definition)); } - pub fn get_group(&self, path: &Path) -> Option<&NodeId> { + pub fn get_group(&self, path: &Path) -> Option<&(NodeId, Bytes)> { self.new_groups.get(path) } - pub fn get_array(&self, path: &Path) -> Option<&(NodeId, ZarrArrayMetadata)> { + pub fn get_array(&self, path: &Path) -> Option<&(NodeId, ArrayData)> { self.new_arrays.get(path) } /// IMPORTANT: This method does not delete children. The caller /// is responsible for doing that pub fn delete_group(&mut self, path: Path, node_id: &NodeId) { - self.updated_attributes.remove(node_id); + self.updated_groups.remove(node_id); if self.new_groups.remove(&path).is_none() { // it's an old group, we need to flag it as deleted self.deleted_groups.insert((path, node_id.clone())); } } - pub fn add_array( - &mut self, - path: Path, - node_id: NodeId, - metadata: ZarrArrayMetadata, - ) { - self.new_arrays.insert(path, (node_id, metadata)); + pub fn add_array(&mut self, path: Path, node_id: NodeId, array_data: ArrayData) { + self.new_arrays.insert(path, (node_id, array_data)); + } + + pub fn update_array(&mut self, node_id: &NodeId, path: &Path, array_data: ArrayData) { + match self.new_arrays.get(path) { + Some((id, _)) => { + debug_assert!(!self.updated_arrays.contains_key(id)); + self.new_arrays.insert(path.clone(), (node_id.clone(), array_data)); + } + None => { + self.updated_arrays.insert(node_id.clone(), array_data); + } + } } - pub fn update_array(&mut self, node_id: NodeId, metadata: ZarrArrayMetadata) { - self.updated_arrays.insert(node_id, metadata); + pub fn update_group(&mut self, node_id: &NodeId, path: &Path, definition: Bytes) { + match self.new_groups.get(path) { + Some((id, _)) => { + debug_assert!(!self.updated_groups.contains_key(id)); + self.new_groups.insert(path.clone(), (node_id.clone(), definition)); + } + None => { + self.updated_groups.insert(node_id.clone(), definition); + } + } } pub fn delete_array(&mut self, path: Path, node_id: &NodeId) { @@ -116,7 +137,6 @@ impl ChangeSet { ); self.updated_arrays.remove(node_id); - self.updated_attributes.remove(node_id); self.set_chunks.remove(node_id); if !is_new_array { self.deleted_arrays.insert((path, node_id.clone())); @@ -128,30 +148,16 @@ impl ChangeSet { self.deleted_groups.contains(&key) || self.deleted_arrays.contains(&key) } - pub fn has_updated_attributes(&self, node_id: &NodeId) -> bool { - self.updated_attributes.contains_key(node_id) - } + //pub fn has_updated_definition(&self, node_id: &NodeId) -> bool { + // self.updated_definitions.contains_key(node_id) + //} - pub fn get_updated_zarr_metadata( - &self, - node_id: &NodeId, - ) -> Option<&ZarrArrayMetadata> { + pub fn get_updated_array(&self, node_id: &NodeId) -> Option<&ArrayData> { self.updated_arrays.get(node_id) } - pub fn update_user_attributes( - &mut self, - node_id: NodeId, - atts: Option, - ) { - self.updated_attributes.insert(node_id, atts); - } - - pub fn get_user_attributes( - &self, - node_id: &NodeId, - ) -> Option<&Option> { - self.updated_attributes.get(node_id) + pub fn get_updated_group(&self, node_id: &NodeId) -> Option<&Bytes> { + self.updated_groups.get(node_id) } pub fn set_chunk_ref( @@ -233,7 +239,7 @@ impl ChangeSet { } pub fn new_groups(&self) -> impl Iterator { - self.new_groups.iter() + self.new_groups.iter().map(|(path, (node_id, _))| (path, node_id)) } pub fn new_arrays(&self) -> impl Iterator { @@ -263,8 +269,8 @@ impl ChangeSet { // TODO: optimize self.new_groups.extend(other.new_groups); self.new_arrays.extend(other.new_arrays); + self.updated_groups.extend(other.updated_groups); self.updated_arrays.extend(other.updated_arrays); - self.updated_attributes.extend(other.updated_attributes); self.deleted_groups.extend(other.deleted_groups); self.deleted_arrays.extend(other.deleted_arrays); @@ -320,27 +326,30 @@ impl ChangeSet { } pub fn get_new_array(&self, path: &Path) -> Option { - self.get_array(path).map(|(id, meta)| { - let meta = self.get_updated_zarr_metadata(id).unwrap_or(meta).clone(); - let atts = self.get_user_attributes(id).cloned(); + self.get_array(path).map(|(id, array_data)| { + debug_assert!(!self.updated_arrays.contains_key(id)); NodeSnapshot { id: id.clone(), path: path.clone(), - user_attributes: atts.flatten().map(UserAttributesSnapshot::Inline), + user_data: array_data.user_data.clone(), // We put no manifests in new arrays, see get_chunk_ref to understand how chunks get // fetched for those arrays - node_data: NodeData::Array(meta.clone(), vec![]), + node_data: NodeData::Array { + shape: array_data.shape.clone(), + dimension_names: array_data.dimension_names.clone(), + manifests: vec![], + }, } }) } pub fn get_new_group(&self, path: &Path) -> Option { - self.get_group(path).map(|id| { - let atts = self.get_user_attributes(id).cloned(); + self.get_group(path).map(|(id, definition)| { + debug_assert!(!self.updated_groups.contains_key(id)); NodeSnapshot { id: id.clone(), path: path.clone(), - user_attributes: atts.flatten().map(UserAttributesSnapshot::Inline), + user_data: definition.clone(), node_data: NodeData::Group, } }) @@ -365,51 +374,51 @@ impl ChangeSet { return None; } - let session_atts = self - .get_user_attributes(&node.id) - .cloned() - .map(|a| a.map(UserAttributesSnapshot::Inline)); - let new_atts = session_atts.unwrap_or(node.user_attributes); match node.node_data { - NodeData::Group => Some(NodeSnapshot { user_attributes: new_atts, ..node }), - NodeData::Array(old_zarr_meta, manifests) => { - let new_zarr_meta = self - .get_updated_zarr_metadata(&node.id) - .cloned() - .unwrap_or(old_zarr_meta); - + NodeData::Group => { + let new_definition = + self.updated_groups.get(&node.id).cloned().unwrap_or(node.user_data); + Some(NodeSnapshot { user_data: new_definition, ..node }) + } + NodeData::Array { shape, dimension_names, manifests } => { + let new_data = + self.updated_arrays.get(&node.id).cloned().unwrap_or_else(|| { + ArrayData { shape, dimension_names, user_data: node.user_data } + }); Some(NodeSnapshot { - node_data: NodeData::Array(new_zarr_meta, manifests), - user_attributes: new_atts, + user_data: new_data.user_data, + node_data: NodeData::Array { + shape: new_data.shape, + dimension_names: new_data.dimension_names, + manifests, + }, ..node }) } } } - pub fn undo_user_attributes_update(&mut self, node_id: &NodeId) { - self.updated_attributes.remove(node_id); + pub fn undo_update(&mut self, node_id: &NodeId) { + self.updated_arrays.remove(node_id); + self.updated_groups.remove(node_id); } } #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { - use std::num::NonZeroU64; - + use bytes::Bytes; use itertools::Itertools; use super::ChangeSet; use crate::{ + change_set::ArrayData, format::{ manifest::{ChunkInfo, ChunkPayload}, - snapshot::ZarrArrayMetadata, + snapshot::ArrayShape, ChunkIndices, NodeId, }, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, }; #[test] @@ -417,36 +426,26 @@ mod tests { let mut change_set = ChangeSet::default(); assert_eq!(None, change_set.new_arrays_chunk_iterator().next()); - let zarr_meta = ZarrArrayMetadata { - shape: vec![2, 2, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; + let shape = ArrayShape::new(vec![(2, 1), (2, 1), (2, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into(), "y".into(), "t".into()]); let node_id1 = NodeId::random(); let node_id2 = NodeId::random(); + let array_data = ArrayData { + shape: shape.clone(), + dimension_names: dimension_names.clone(), + user_data: Bytes::from_static(b"foobar"), + }; change_set.add_array( "/foo/bar".try_into().unwrap(), node_id1.clone(), - zarr_meta.clone(), + array_data.clone(), + ); + change_set.add_array( + "/foo/baz".try_into().unwrap(), + node_id2.clone(), + array_data.clone(), ); - change_set.add_array("/foo/baz".try_into().unwrap(), node_id2.clone(), zarr_meta); assert_eq!(None, change_set.new_arrays_chunk_iterator().next()); change_set.set_chunk_ref(node_id1.clone(), ChunkIndices(vec![0, 1]), None); diff --git a/icechunk/src/conflicts/basic_solver.rs b/icechunk/src/conflicts/basic_solver.rs index 642c6b7f..c154da6e 100644 --- a/icechunk/src/conflicts/basic_solver.rs +++ b/icechunk/src/conflicts/basic_solver.rs @@ -17,7 +17,6 @@ pub enum VersionSelection { #[derive(Debug, Clone)] pub struct BasicConflictSolver { - pub on_user_attributes_conflict: VersionSelection, pub on_chunk_conflict: VersionSelection, pub fail_on_delete_of_updated_array: bool, pub fail_on_delete_of_updated_group: bool, @@ -26,7 +25,6 @@ pub struct BasicConflictSolver { impl Default for BasicConflictSolver { fn default() -> Self { Self { - on_user_attributes_conflict: VersionSelection::UseOurs, on_chunk_conflict: VersionSelection::UseOurs, fail_on_delete_of_updated_array: false, fail_on_delete_of_updated_group: false, @@ -72,32 +70,24 @@ impl BasicConflictSolver { conflicts: Vec, ) -> SessionResult { use Conflict::*; - let unsolvable = conflicts.iter().any( - |conflict| { - matches!( - conflict, - NewNodeConflictsWithExistingNode(_) | - NewNodeInInvalidGroup(_) | - ZarrMetadataDoubleUpdate(_) | - ZarrMetadataUpdateOfDeletedArray(_) | - UserAttributesUpdateOfDeletedNode(_) | - ChunksUpdatedInDeletedArray{..} | - ChunksUpdatedInUpdatedArray{..} - ) || - matches!(conflict, - UserAttributesDoubleUpdate{..} if self.on_user_attributes_conflict == VersionSelection::Fail - ) || - matches!(conflict, - ChunkDoubleUpdate{..} if self.on_chunk_conflict == VersionSelection::Fail - ) || - matches!(conflict, - DeleteOfUpdatedArray{..} if self.fail_on_delete_of_updated_array - ) || - matches!(conflict, - DeleteOfUpdatedGroup{..} if self.fail_on_delete_of_updated_group - ) - }, - ); + let unsolvable = conflicts.iter().any(|conflict| { + matches!( + conflict, + NewNodeConflictsWithExistingNode(_) + | NewNodeInInvalidGroup(_) + | ZarrMetadataDoubleUpdate(_) + | ZarrMetadataUpdateOfDeletedArray(_) + | ZarrMetadataUpdateOfDeletedGroup(_) + | ChunksUpdatedInDeletedArray { .. } + | ChunksUpdatedInUpdatedArray { .. } + ) || matches!(conflict, + ChunkDoubleUpdate{..} if self.on_chunk_conflict == VersionSelection::Fail + ) || matches!(conflict, + DeleteOfUpdatedArray{..} if self.fail_on_delete_of_updated_array + ) || matches!(conflict, + DeleteOfUpdatedGroup{..} if self.fail_on_delete_of_updated_group + ) + }); if unsolvable { return Ok(ConflictResolution::Unsolvable { @@ -123,20 +113,6 @@ impl BasicConflictSolver { VersionSelection::Fail => panic!("Bug in conflict resolution: ChunkDoubleUpdate flagged as unrecoverable") } } - UserAttributesDoubleUpdate { node_id, .. } => { - match self.on_user_attributes_conflict { - VersionSelection::UseOurs => { - // this is a no-op, our change will override the conflicting change - } - VersionSelection::UseTheirs => { - current_changes.undo_user_attributes_update(&node_id); - } - // we can panic here because we have returned from the function if there - // were any unsolvable conflicts - #[allow(clippy::panic)] - VersionSelection::Fail => panic!("Bug in conflict resolution: UserAttributesDoubleUpdate flagged as unrecoverable") - } - } DeleteOfUpdatedArray { .. } => { assert!(!self.fail_on_delete_of_updated_array); // this is a no-op, the solution is to still delete the array diff --git a/icechunk/src/conflicts/detector.rs b/icechunk/src/conflicts/detector.rs index 2569b560..5bdd8dbe 100644 --- a/icechunk/src/conflicts/detector.rs +++ b/icechunk/src/conflicts/detector.rs @@ -66,8 +66,8 @@ impl ConflictSolver for ConflictDetector { let path_finder = PathFinder::new(current_repo.list_nodes().await?); let updated_arrays_already_updated = current_changes - .zarr_updated_arrays() - .filter(|node_id| previous_change.zarr_metadata_updated(node_id)) + .updated_arrays() + .filter(|node_id| previous_change.array_updated(node_id)) .map(Ok); let updated_arrays_already_updated = stream::iter(updated_arrays_already_updated) @@ -76,43 +76,37 @@ impl ConflictSolver for ConflictDetector { Ok(Conflict::ZarrMetadataDoubleUpdate(path)) }); - let updated_arrays_were_deleted = current_changes - .zarr_updated_arrays() - .filter(|node_id| previous_change.array_deleted(node_id)) + let updated_groups_already_updated = current_changes + .updated_groups() + .filter(|node_id| previous_change.group_updated(node_id)) .map(Ok); - let updated_arrays_were_deleted = stream::iter(updated_arrays_were_deleted) + let updated_groups_already_updated = stream::iter(updated_groups_already_updated) .and_then(|node_id| async { let path = path_finder.find(node_id)?; - Ok(Conflict::ZarrMetadataUpdateOfDeletedArray(path)) + Ok(Conflict::ZarrMetadataDoubleUpdate(path)) }); - let updated_attributes_already_updated = current_changes - .user_attributes_updated_nodes() - .filter(|node_id| previous_change.user_attributes_updated(node_id)) + let updated_arrays_were_deleted = current_changes + .updated_arrays() + .filter(|node_id| previous_change.array_deleted(node_id)) .map(Ok); - let updated_attributes_already_updated = - stream::iter(updated_attributes_already_updated).and_then(|node_id| async { + let updated_arrays_were_deleted = stream::iter(updated_arrays_were_deleted) + .and_then(|node_id| async { let path = path_finder.find(node_id)?; - Ok(Conflict::UserAttributesDoubleUpdate { - path, - node_id: node_id.clone(), - }) + Ok(Conflict::ZarrMetadataUpdateOfDeletedArray(path)) }); - let updated_attributes_on_deleted_node = current_changes - .user_attributes_updated_nodes() - .filter(|node_id| { - previous_change.array_deleted(node_id) - || previous_change.group_deleted(node_id) - }) + let updated_groups_were_deleted = current_changes + .updated_groups() + .filter(|node_id| previous_change.group_deleted(node_id)) .map(Ok); - let updated_attributes_on_deleted_node = - stream::iter(updated_attributes_on_deleted_node).and_then(|node_id| async { + let updated_groups_were_deleted = stream::iter(updated_groups_were_deleted) + .and_then(|node_id| async { let path = path_finder.find(node_id)?; - Ok(Conflict::UserAttributesUpdateOfDeletedNode(path)) + Ok(Conflict::ZarrMetadataUpdateOfDeletedGroup(path)) }); let chunks_updated_in_deleted_array = current_changes @@ -131,7 +125,7 @@ impl ConflictSolver for ConflictDetector { let chunks_updated_in_updated_array = current_changes .arrays_with_chunk_changes() - .filter(|node_id| previous_change.zarr_metadata_updated(node_id)) + .filter(|node_id| previous_change.array_updated(node_id)) .map(Ok); let chunks_updated_in_updated_array = @@ -187,8 +181,7 @@ impl ConflictSolver for ConflictDetector { }; if let Some(node_id) = id { - if previous_change.zarr_metadata_updated(&node_id) - || previous_change.user_attributes_updated(&node_id) + if previous_change.array_updated(&node_id) || previous_change.chunks_updated(&node_id) { Ok(Some(Conflict::DeleteOfUpdatedArray { @@ -216,7 +209,7 @@ impl ConflictSolver for ConflictDetector { }; if let Some(node_id) = id { - if previous_change.user_attributes_updated(&node_id) { + if previous_change.group_updated(&node_id) { Ok(Some(Conflict::DeleteOfUpdatedGroup { path: path.clone(), node_id: node_id.clone(), @@ -232,9 +225,9 @@ impl ConflictSolver for ConflictDetector { let all_conflicts: Vec<_> = new_nodes_explicit_conflicts .chain(new_nodes_implicit_conflicts) .chain(updated_arrays_already_updated) + .chain(updated_groups_already_updated) .chain(updated_arrays_were_deleted) - .chain(updated_attributes_already_updated) - .chain(updated_attributes_on_deleted_node) + .chain(updated_groups_were_deleted) .chain(chunks_updated_in_deleted_array) .chain(chunks_updated_in_updated_array) .chain(chunks_double_updated) diff --git a/icechunk/src/conflicts/mod.rs b/icechunk/src/conflicts/mod.rs index 6bb84e5a..bc6b69d7 100644 --- a/icechunk/src/conflicts/mod.rs +++ b/icechunk/src/conflicts/mod.rs @@ -17,11 +17,7 @@ pub enum Conflict { NewNodeInInvalidGroup(Path), ZarrMetadataDoubleUpdate(Path), ZarrMetadataUpdateOfDeletedArray(Path), - UserAttributesDoubleUpdate { - path: Path, - node_id: NodeId, - }, - UserAttributesUpdateOfDeletedNode(Path), + ZarrMetadataUpdateOfDeletedGroup(Path), ChunkDoubleUpdate { path: Path, node_id: NodeId, diff --git a/icechunk/src/format/flatbuffers/all_generated.rs b/icechunk/src/format/flatbuffers/all_generated.rs index 6b67d66e..a0fe584e 100644 --- a/icechunk/src/format/flatbuffers/all_generated.rs +++ b/icechunk/src/format/flatbuffers/all_generated.rs @@ -17,104 +17,6 @@ pub mod gen { extern crate flatbuffers; use self::flatbuffers::{EndianScalar, Follow}; - #[deprecated( - since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." - )] - pub const ENUM_MIN_USER_ATTRIBUTES_SNAPSHOT: u8 = 0; - #[deprecated( - since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." - )] - pub const ENUM_MAX_USER_ATTRIBUTES_SNAPSHOT: u8 = 2; - #[deprecated( - since = "2.0.0", - note = "Use associated constants instead. This will no longer be generated in 2021." - )] - #[allow(non_camel_case_types)] - pub const ENUM_VALUES_USER_ATTRIBUTES_SNAPSHOT: [UserAttributesSnapshot; 3] = [ - UserAttributesSnapshot::NONE, - UserAttributesSnapshot::Inline, - UserAttributesSnapshot::Reference, - ]; - - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] - #[repr(transparent)] - pub struct UserAttributesSnapshot(pub u8); - #[allow(non_upper_case_globals)] - impl UserAttributesSnapshot { - pub const NONE: Self = Self(0); - pub const Inline: Self = Self(1); - pub const Reference: Self = Self(2); - - pub const ENUM_MIN: u8 = 0; - pub const ENUM_MAX: u8 = 2; - pub const ENUM_VALUES: &'static [Self] = - &[Self::NONE, Self::Inline, Self::Reference]; - /// Returns the variant's name or "" if unknown. - pub fn variant_name(self) -> Option<&'static str> { - match self { - Self::NONE => Some("NONE"), - Self::Inline => Some("Inline"), - Self::Reference => Some("Reference"), - _ => None, - } - } - } - impl core::fmt::Debug for UserAttributesSnapshot { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - if let Some(name) = self.variant_name() { - f.write_str(name) - } else { - f.write_fmt(format_args!("", self.0)) - } - } - } - impl<'a> flatbuffers::Follow<'a> for UserAttributesSnapshot { - type Inner = Self; - #[inline] - unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - let b = flatbuffers::read_scalar_at::(buf, loc); - Self(b) - } - } - - impl flatbuffers::Push for UserAttributesSnapshot { - type Output = UserAttributesSnapshot; - #[inline] - unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { - flatbuffers::emplace_scalar::(dst, self.0); - } - } - - impl flatbuffers::EndianScalar for UserAttributesSnapshot { - type Scalar = u8; - #[inline] - fn to_little_endian(self) -> u8 { - self.0.to_le() - } - #[inline] - #[allow(clippy::wrong_self_convention)] - fn from_little_endian(v: u8) -> Self { - let b = u8::from_le(v); - Self(b) - } - } - - impl<'a> flatbuffers::Verifiable for UserAttributesSnapshot { - #[inline] - fn run_verifier( - v: &mut flatbuffers::Verifier, - pos: usize, - ) -> Result<(), flatbuffers::InvalidFlatbuffer> { - use self::flatbuffers::Verifiable; - u8::run_verifier(v, pos) - } - } - - impl flatbuffers::SimpleToVerifyInSlice for UserAttributesSnapshot {} - pub struct UserAttributesSnapshotUnionTableOffset {} - #[deprecated( since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021." @@ -513,53 +415,56 @@ pub mod gen { } } - // struct AttributeFileInfo, aligned to 1 + // struct ChunkIndexRange, aligned to 4 #[repr(transparent)] #[derive(Clone, Copy, PartialEq)] - pub struct AttributeFileInfo(pub [u8; 12]); - impl Default for AttributeFileInfo { + pub struct ChunkIndexRange(pub [u8; 8]); + impl Default for ChunkIndexRange { fn default() -> Self { - Self([0; 12]) + Self([0; 8]) } } - impl core::fmt::Debug for AttributeFileInfo { + impl core::fmt::Debug for ChunkIndexRange { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - f.debug_struct("AttributeFileInfo").field("id", &self.id()).finish() + f.debug_struct("ChunkIndexRange") + .field("from", &self.from()) + .field("to", &self.to()) + .finish() } } - impl flatbuffers::SimpleToVerifyInSlice for AttributeFileInfo {} - impl<'a> flatbuffers::Follow<'a> for AttributeFileInfo { - type Inner = &'a AttributeFileInfo; + impl flatbuffers::SimpleToVerifyInSlice for ChunkIndexRange {} + impl<'a> flatbuffers::Follow<'a> for ChunkIndexRange { + type Inner = &'a ChunkIndexRange; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - <&'a AttributeFileInfo>::follow(buf, loc) + <&'a ChunkIndexRange>::follow(buf, loc) } } - impl<'a> flatbuffers::Follow<'a> for &'a AttributeFileInfo { - type Inner = &'a AttributeFileInfo; + impl<'a> flatbuffers::Follow<'a> for &'a ChunkIndexRange { + type Inner = &'a ChunkIndexRange; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - flatbuffers::follow_cast_ref::(buf, loc) + flatbuffers::follow_cast_ref::(buf, loc) } } - impl<'b> flatbuffers::Push for AttributeFileInfo { - type Output = AttributeFileInfo; + impl<'b> flatbuffers::Push for ChunkIndexRange { + type Output = ChunkIndexRange; #[inline] unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { let src = ::core::slice::from_raw_parts( - self as *const AttributeFileInfo as *const u8, + self as *const ChunkIndexRange as *const u8, ::size(), ); dst.copy_from_slice(src); } #[inline] fn alignment() -> flatbuffers::PushAlignment { - flatbuffers::PushAlignment::new(1) + flatbuffers::PushAlignment::new(4) } } - impl<'a> flatbuffers::Verifiable for AttributeFileInfo { + impl<'a> flatbuffers::Verifiable for ChunkIndexRange { #[inline] fn run_verifier( v: &mut flatbuffers::Verifier, @@ -570,77 +475,126 @@ pub mod gen { } } - impl<'a> AttributeFileInfo { + impl<'a> ChunkIndexRange { #[allow(clippy::too_many_arguments)] - pub fn new(id: &ObjectId12) -> Self { - let mut s = Self([0; 12]); - s.set_id(id); + pub fn new(from: u32, to: u32) -> Self { + let mut s = Self([0; 8]); + s.set_from(from); + s.set_to(to); s } - pub fn id(&self) -> &ObjectId12 { + pub fn from(&self) -> u32 { + let mut mem = + core::mem::MaybeUninit::<::Scalar>::uninit(); // Safety: // Created from a valid Table for this object - // Which contains a valid struct in this slot - unsafe { &*(self.0[0..].as_ptr() as *const ObjectId12) } + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[0..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) } - #[allow(clippy::identity_op)] - pub fn set_id(&mut self, x: &ObjectId12) { - self.0[0..0 + 12].copy_from_slice(&x.0) + pub fn set_from(&mut self, x: u32) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[0..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } + } + + pub fn to(&self) -> u32 { + let mut mem = + core::mem::MaybeUninit::<::Scalar>::uninit(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + EndianScalar::from_little_endian(unsafe { + core::ptr::copy_nonoverlapping( + self.0[4..].as_ptr(), + mem.as_mut_ptr() as *mut u8, + core::mem::size_of::<::Scalar>(), + ); + mem.assume_init() + }) + } + + pub fn set_to(&mut self, x: u32) { + let x_le = x.to_little_endian(); + // Safety: + // Created from a valid Table for this object + // Which contains a valid value in this slot + unsafe { + core::ptr::copy_nonoverlapping( + &x_le as *const _ as *const u8, + self.0[4..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), + ); + } } } - // struct ChunkIndexRange, aligned to 4 + // struct DimensionShape, aligned to 8 #[repr(transparent)] #[derive(Clone, Copy, PartialEq)] - pub struct ChunkIndexRange(pub [u8; 8]); - impl Default for ChunkIndexRange { + pub struct DimensionShape(pub [u8; 16]); + impl Default for DimensionShape { fn default() -> Self { - Self([0; 8]) + Self([0; 16]) } } - impl core::fmt::Debug for ChunkIndexRange { + impl core::fmt::Debug for DimensionShape { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - f.debug_struct("ChunkIndexRange") - .field("from", &self.from()) - .field("to", &self.to()) + f.debug_struct("DimensionShape") + .field("array_length", &self.array_length()) + .field("chunk_length", &self.chunk_length()) .finish() } } - impl flatbuffers::SimpleToVerifyInSlice for ChunkIndexRange {} - impl<'a> flatbuffers::Follow<'a> for ChunkIndexRange { - type Inner = &'a ChunkIndexRange; + impl flatbuffers::SimpleToVerifyInSlice for DimensionShape {} + impl<'a> flatbuffers::Follow<'a> for DimensionShape { + type Inner = &'a DimensionShape; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - <&'a ChunkIndexRange>::follow(buf, loc) + <&'a DimensionShape>::follow(buf, loc) } } - impl<'a> flatbuffers::Follow<'a> for &'a ChunkIndexRange { - type Inner = &'a ChunkIndexRange; + impl<'a> flatbuffers::Follow<'a> for &'a DimensionShape { + type Inner = &'a DimensionShape; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - flatbuffers::follow_cast_ref::(buf, loc) + flatbuffers::follow_cast_ref::(buf, loc) } } - impl<'b> flatbuffers::Push for ChunkIndexRange { - type Output = ChunkIndexRange; + impl<'b> flatbuffers::Push for DimensionShape { + type Output = DimensionShape; #[inline] unsafe fn push(&self, dst: &mut [u8], _written_len: usize) { let src = ::core::slice::from_raw_parts( - self as *const ChunkIndexRange as *const u8, + self as *const DimensionShape as *const u8, ::size(), ); dst.copy_from_slice(src); } #[inline] fn alignment() -> flatbuffers::PushAlignment { - flatbuffers::PushAlignment::new(4) + flatbuffers::PushAlignment::new(8) } } - impl<'a> flatbuffers::Verifiable for ChunkIndexRange { + impl<'a> flatbuffers::Verifiable for DimensionShape { #[inline] fn run_verifier( v: &mut flatbuffers::Verifier, @@ -651,18 +605,18 @@ pub mod gen { } } - impl<'a> ChunkIndexRange { + impl<'a> DimensionShape { #[allow(clippy::too_many_arguments)] - pub fn new(from: u32, to: u32) -> Self { - let mut s = Self([0; 8]); - s.set_from(from); - s.set_to(to); + pub fn new(array_length: u64, chunk_length: u64) -> Self { + let mut s = Self([0; 16]); + s.set_array_length(array_length); + s.set_chunk_length(chunk_length); s } - pub fn from(&self) -> u32 { + pub fn array_length(&self) -> u64 { let mut mem = - core::mem::MaybeUninit::<::Scalar>::uninit(); + core::mem::MaybeUninit::<::Scalar>::uninit(); // Safety: // Created from a valid Table for this object // Which contains a valid value in this slot @@ -670,13 +624,13 @@ pub mod gen { core::ptr::copy_nonoverlapping( self.0[0..].as_ptr(), mem.as_mut_ptr() as *mut u8, - core::mem::size_of::<::Scalar>(), + core::mem::size_of::<::Scalar>(), ); mem.assume_init() }) } - pub fn set_from(&mut self, x: u32) { + pub fn set_array_length(&mut self, x: u64) { let x_le = x.to_little_endian(); // Safety: // Created from a valid Table for this object @@ -685,28 +639,28 @@ pub mod gen { core::ptr::copy_nonoverlapping( &x_le as *const _ as *const u8, self.0[0..].as_mut_ptr(), - core::mem::size_of::<::Scalar>(), + core::mem::size_of::<::Scalar>(), ); } } - pub fn to(&self) -> u32 { + pub fn chunk_length(&self) -> u64 { let mut mem = - core::mem::MaybeUninit::<::Scalar>::uninit(); + core::mem::MaybeUninit::<::Scalar>::uninit(); // Safety: // Created from a valid Table for this object // Which contains a valid value in this slot EndianScalar::from_little_endian(unsafe { core::ptr::copy_nonoverlapping( - self.0[4..].as_ptr(), + self.0[8..].as_ptr(), mem.as_mut_ptr() as *mut u8, - core::mem::size_of::<::Scalar>(), + core::mem::size_of::<::Scalar>(), ); mem.assume_init() }) } - pub fn set_to(&mut self, x: u32) { + pub fn set_chunk_length(&mut self, x: u64) { let x_le = x.to_little_endian(); // Safety: // Created from a valid Table for this object @@ -714,8 +668,8 @@ pub mod gen { unsafe { core::ptr::copy_nonoverlapping( &x_le as *const _ as *const u8, - self.0[4..].as_mut_ptr(), - core::mem::size_of::<::Scalar>(), + self.0[8..].as_mut_ptr(), + core::mem::size_of::<::Scalar>(), ); } } @@ -1300,312 +1254,28 @@ pub mod gen { ds.finish() } } - pub enum MetadataItemOffset {} - #[derive(Copy, Clone, PartialEq)] - - pub struct MetadataItem<'a> { - pub _tab: flatbuffers::Table<'a>, - } - - impl<'a> flatbuffers::Follow<'a> for MetadataItem<'a> { - type Inner = MetadataItem<'a>; - #[inline] - unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - Self { _tab: flatbuffers::Table::new(buf, loc) } - } - } - - impl<'a> MetadataItem<'a> { - pub const VT_NAME: flatbuffers::VOffsetT = 4; - pub const VT_VALUE: flatbuffers::VOffsetT = 6; - - #[inline] - pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { - MetadataItem { _tab: table } - } - #[allow(unused_mut)] - pub fn create< - 'bldr: 'args, - 'args: 'mut_bldr, - 'mut_bldr, - A: flatbuffers::Allocator + 'bldr, - >( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, - args: &'args MetadataItemArgs<'args>, - ) -> flatbuffers::WIPOffset> { - let mut builder = MetadataItemBuilder::new(_fbb); - if let Some(x) = args.value { - builder.add_value(x); - } - if let Some(x) = args.name { - builder.add_name(x); - } - builder.finish() - } - - #[inline] - pub fn name(&self) -> &'a str { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::>( - MetadataItem::VT_NAME, - None, - ) - .unwrap() - } - } - #[inline] - pub fn value(&self) -> flatbuffers::Vector<'a, u8> { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::>>( - MetadataItem::VT_VALUE, - None, - ) - .unwrap() - } - } - } - - impl flatbuffers::Verifiable for MetadataItem<'_> { - #[inline] - fn run_verifier( - v: &mut flatbuffers::Verifier, - pos: usize, - ) -> Result<(), flatbuffers::InvalidFlatbuffer> { - use self::flatbuffers::Verifiable; - v.visit_table(pos)? - .visit_field::>( - "name", - Self::VT_NAME, - true, - )? - .visit_field::>>( - "value", - Self::VT_VALUE, - true, - )? - .finish(); - Ok(()) - } - } - pub struct MetadataItemArgs<'a> { - pub name: Option>, - pub value: Option>>, - } - impl<'a> Default for MetadataItemArgs<'a> { - #[inline] - fn default() -> Self { - MetadataItemArgs { - name: None, // required field - value: None, // required field - } - } - } - - pub struct MetadataItemBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - start_: flatbuffers::WIPOffset, - } - impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MetadataItemBuilder<'a, 'b, A> { - #[inline] - pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) { - self.fbb_.push_slot_always::>( - MetadataItem::VT_NAME, - name, - ); - } - #[inline] - pub fn add_value( - &mut self, - value: flatbuffers::WIPOffset>, - ) { - self.fbb_.push_slot_always::>( - MetadataItem::VT_VALUE, - value, - ); - } - #[inline] - pub fn new( - _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - ) -> MetadataItemBuilder<'a, 'b, A> { - let start = _fbb.start_table(); - MetadataItemBuilder { fbb_: _fbb, start_: start } - } - #[inline] - pub fn finish(self) -> flatbuffers::WIPOffset> { - let o = self.fbb_.end_table(self.start_); - self.fbb_.required(o, MetadataItem::VT_NAME, "name"); - self.fbb_.required(o, MetadataItem::VT_VALUE, "value"); - flatbuffers::WIPOffset::new(o.value()) - } - } - - impl core::fmt::Debug for MetadataItem<'_> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let mut ds = f.debug_struct("MetadataItem"); - ds.field("name", &self.name()); - ds.field("value", &self.value()); - ds.finish() - } - } - pub enum UserAttributesRefOffset {} - #[derive(Copy, Clone, PartialEq)] - - pub struct UserAttributesRef<'a> { - pub _tab: flatbuffers::Table<'a>, - } - - impl<'a> flatbuffers::Follow<'a> for UserAttributesRef<'a> { - type Inner = UserAttributesRef<'a>; - #[inline] - unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { - Self { _tab: flatbuffers::Table::new(buf, loc) } - } - } - - impl<'a> UserAttributesRef<'a> { - pub const VT_OBJECT_ID: flatbuffers::VOffsetT = 4; - pub const VT_LOCATION: flatbuffers::VOffsetT = 6; - - #[inline] - pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { - UserAttributesRef { _tab: table } - } - #[allow(unused_mut)] - pub fn create< - 'bldr: 'args, - 'args: 'mut_bldr, - 'mut_bldr, - A: flatbuffers::Allocator + 'bldr, - >( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, - args: &'args UserAttributesRefArgs<'args>, - ) -> flatbuffers::WIPOffset> { - let mut builder = UserAttributesRefBuilder::new(_fbb); - builder.add_location(args.location); - if let Some(x) = args.object_id { - builder.add_object_id(x); - } - builder.finish() - } - - #[inline] - pub fn object_id(&self) -> &'a ObjectId12 { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::(UserAttributesRef::VT_OBJECT_ID, None) - .unwrap() - } - } - #[inline] - pub fn location(&self) -> u32 { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab.get::(UserAttributesRef::VT_LOCATION, Some(0)).unwrap() - } - } - } - - impl flatbuffers::Verifiable for UserAttributesRef<'_> { - #[inline] - fn run_verifier( - v: &mut flatbuffers::Verifier, - pos: usize, - ) -> Result<(), flatbuffers::InvalidFlatbuffer> { - use self::flatbuffers::Verifiable; - v.visit_table(pos)? - .visit_field::("object_id", Self::VT_OBJECT_ID, true)? - .visit_field::("location", Self::VT_LOCATION, false)? - .finish(); - Ok(()) - } - } - pub struct UserAttributesRefArgs<'a> { - pub object_id: Option<&'a ObjectId12>, - pub location: u32, - } - impl<'a> Default for UserAttributesRefArgs<'a> { - #[inline] - fn default() -> Self { - UserAttributesRefArgs { - object_id: None, // required field - location: 0, - } - } - } - - pub struct UserAttributesRefBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - start_: flatbuffers::WIPOffset, - } - impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> UserAttributesRefBuilder<'a, 'b, A> { - #[inline] - pub fn add_object_id(&mut self, object_id: &ObjectId12) { - self.fbb_.push_slot_always::<&ObjectId12>( - UserAttributesRef::VT_OBJECT_ID, - object_id, - ); - } - #[inline] - pub fn add_location(&mut self, location: u32) { - self.fbb_.push_slot::(UserAttributesRef::VT_LOCATION, location, 0); - } - #[inline] - pub fn new( - _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - ) -> UserAttributesRefBuilder<'a, 'b, A> { - let start = _fbb.start_table(); - UserAttributesRefBuilder { fbb_: _fbb, start_: start } - } - #[inline] - pub fn finish(self) -> flatbuffers::WIPOffset> { - let o = self.fbb_.end_table(self.start_); - self.fbb_.required(o, UserAttributesRef::VT_OBJECT_ID, "object_id"); - flatbuffers::WIPOffset::new(o.value()) - } - } - - impl core::fmt::Debug for UserAttributesRef<'_> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let mut ds = f.debug_struct("UserAttributesRef"); - ds.field("object_id", &self.object_id()); - ds.field("location", &self.location()); - ds.finish() - } - } - pub enum InlineUserAttributesOffset {} + pub enum MetadataItemOffset {} #[derive(Copy, Clone, PartialEq)] - pub struct InlineUserAttributes<'a> { + pub struct MetadataItem<'a> { pub _tab: flatbuffers::Table<'a>, } - impl<'a> flatbuffers::Follow<'a> for InlineUserAttributes<'a> { - type Inner = InlineUserAttributes<'a>; + impl<'a> flatbuffers::Follow<'a> for MetadataItem<'a> { + type Inner = MetadataItem<'a>; #[inline] unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { Self { _tab: flatbuffers::Table::new(buf, loc) } } } - impl<'a> InlineUserAttributes<'a> { - pub const VT_DATA: flatbuffers::VOffsetT = 4; + impl<'a> MetadataItem<'a> { + pub const VT_NAME: flatbuffers::VOffsetT = 4; + pub const VT_VALUE: flatbuffers::VOffsetT = 6; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { - InlineUserAttributes { _tab: table } + MetadataItem { _tab: table } } #[allow(unused_mut)] pub fn create< @@ -1615,24 +1285,41 @@ pub mod gen { A: flatbuffers::Allocator + 'bldr, >( _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, - args: &'args InlineUserAttributesArgs<'args>, - ) -> flatbuffers::WIPOffset> { - let mut builder = InlineUserAttributesBuilder::new(_fbb); - if let Some(x) = args.data { - builder.add_data(x); + args: &'args MetadataItemArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = MetadataItemBuilder::new(_fbb); + if let Some(x) = args.value { + builder.add_value(x); + } + if let Some(x) = args.name { + builder.add_name(x); } builder.finish() } #[inline] - pub fn data(&self) -> flatbuffers::Vector<'a, u8> { + pub fn name(&self) -> &'a str { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab + .get::>( + MetadataItem::VT_NAME, + None, + ) + .unwrap() + } + } + #[inline] + pub fn value(&self) -> flatbuffers::Vector<'a, u8> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { self._tab .get::>>( - InlineUserAttributes::VT_DATA, + MetadataItem::VT_VALUE, None, ) .unwrap() @@ -1640,7 +1327,7 @@ pub mod gen { } } - impl flatbuffers::Verifiable for InlineUserAttributes<'_> { + impl flatbuffers::Verifiable for MetadataItem<'_> { #[inline] fn run_verifier( v: &mut flatbuffers::Verifier, @@ -1648,61 +1335,77 @@ pub mod gen { ) -> Result<(), flatbuffers::InvalidFlatbuffer> { use self::flatbuffers::Verifiable; v.visit_table(pos)? + .visit_field::>( + "name", + Self::VT_NAME, + true, + )? .visit_field::>>( - "data", - Self::VT_DATA, + "value", + Self::VT_VALUE, true, )? .finish(); Ok(()) } } - pub struct InlineUserAttributesArgs<'a> { - pub data: Option>>, + pub struct MetadataItemArgs<'a> { + pub name: Option>, + pub value: Option>>, } - impl<'a> Default for InlineUserAttributesArgs<'a> { + impl<'a> Default for MetadataItemArgs<'a> { #[inline] fn default() -> Self { - InlineUserAttributesArgs { - data: None, // required field - } + MetadataItemArgs { + name: None, // required field + value: None, // required field + } } } - pub struct InlineUserAttributesBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + pub struct MetadataItemBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, start_: flatbuffers::WIPOffset, } - impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> InlineUserAttributesBuilder<'a, 'b, A> { + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MetadataItemBuilder<'a, 'b, A> { + #[inline] + pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + MetadataItem::VT_NAME, + name, + ); + } #[inline] - pub fn add_data( + pub fn add_value( &mut self, - data: flatbuffers::WIPOffset>, + value: flatbuffers::WIPOffset>, ) { self.fbb_.push_slot_always::>( - InlineUserAttributes::VT_DATA, - data, + MetadataItem::VT_VALUE, + value, ); } #[inline] pub fn new( _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, - ) -> InlineUserAttributesBuilder<'a, 'b, A> { + ) -> MetadataItemBuilder<'a, 'b, A> { let start = _fbb.start_table(); - InlineUserAttributesBuilder { fbb_: _fbb, start_: start } + MetadataItemBuilder { fbb_: _fbb, start_: start } } #[inline] - pub fn finish(self) -> flatbuffers::WIPOffset> { + pub fn finish(self) -> flatbuffers::WIPOffset> { let o = self.fbb_.end_table(self.start_); - self.fbb_.required(o, InlineUserAttributes::VT_DATA, "data"); + self.fbb_.required(o, MetadataItem::VT_NAME, "name"); + self.fbb_.required(o, MetadataItem::VT_VALUE, "value"); flatbuffers::WIPOffset::new(o.value()) } } - impl core::fmt::Debug for InlineUserAttributes<'_> { + impl core::fmt::Debug for MetadataItem<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - let mut ds = f.debug_struct("InlineUserAttributes"); - ds.field("data", &self.data()); + let mut ds = f.debug_struct("MetadataItem"); + ds.field("name", &self.name()); + ds.field("value", &self.value()); ds.finish() } } @@ -1842,6 +1545,119 @@ pub mod gen { ds.finish() } } + pub enum DimensionNameOffset {} + #[derive(Copy, Clone, PartialEq)] + + pub struct DimensionName<'a> { + pub _tab: flatbuffers::Table<'a>, + } + + impl<'a> flatbuffers::Follow<'a> for DimensionName<'a> { + type Inner = DimensionName<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: flatbuffers::Table::new(buf, loc) } + } + } + + impl<'a> DimensionName<'a> { + pub const VT_NAME: flatbuffers::VOffsetT = 4; + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + DimensionName { _tab: table } + } + #[allow(unused_mut)] + pub fn create< + 'bldr: 'args, + 'args: 'mut_bldr, + 'mut_bldr, + A: flatbuffers::Allocator + 'bldr, + >( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + args: &'args DimensionNameArgs<'args>, + ) -> flatbuffers::WIPOffset> { + let mut builder = DimensionNameBuilder::new(_fbb); + if let Some(x) = args.name { + builder.add_name(x); + } + builder.finish() + } + + #[inline] + pub fn name(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>( + DimensionName::VT_NAME, + None, + ) + } + } + } + + impl flatbuffers::Verifiable for DimensionName<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, + pos: usize, + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .visit_field::>( + "name", + Self::VT_NAME, + false, + )? + .finish(); + Ok(()) + } + } + pub struct DimensionNameArgs<'a> { + pub name: Option>, + } + impl<'a> Default for DimensionNameArgs<'a> { + #[inline] + fn default() -> Self { + DimensionNameArgs { name: None } + } + } + + pub struct DimensionNameBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, + } + impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DimensionNameBuilder<'a, 'b, A> { + #[inline] + pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + DimensionName::VT_NAME, + name, + ); + } + #[inline] + pub fn new( + _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + ) -> DimensionNameBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + DimensionNameBuilder { fbb_: _fbb, start_: start } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } + } + + impl core::fmt::Debug for DimensionName<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("DimensionName"); + ds.field("name", &self.name()); + ds.finish() + } + } pub enum GroupNodeDataOffset {} #[derive(Copy, Clone, PartialEq)] @@ -1937,8 +1753,9 @@ pub mod gen { } impl<'a> ArrayNodeData<'a> { - pub const VT_ZARR_METADATA: flatbuffers::VOffsetT = 4; - pub const VT_MANIFESTS: flatbuffers::VOffsetT = 6; + pub const VT_SHAPE: flatbuffers::VOffsetT = 4; + pub const VT_DIMENSION_NAMES: flatbuffers::VOffsetT = 6; + pub const VT_MANIFESTS: flatbuffers::VOffsetT = 8; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -1958,24 +1775,37 @@ pub mod gen { if let Some(x) = args.manifests { builder.add_manifests(x); } - if let Some(x) = args.zarr_metadata { - builder.add_zarr_metadata(x); + if let Some(x) = args.dimension_names { + builder.add_dimension_names(x); + } + if let Some(x) = args.shape { + builder.add_shape(x); } builder.finish() } #[inline] - pub fn zarr_metadata(&self) -> flatbuffers::Vector<'a, u8> { + pub fn shape(&self) -> flatbuffers::Vector<'a, DimensionShape> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { - self._tab - .get::>>( - ArrayNodeData::VT_ZARR_METADATA, - None, - ) - .unwrap() + self._tab.get::>>(ArrayNodeData::VT_SHAPE, None).unwrap() + } + } + #[inline] + pub fn dimension_names( + &self, + ) -> Option< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + > { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>, + >>(ArrayNodeData::VT_DIMENSION_NAMES, None) } } #[inline] @@ -2007,20 +1837,21 @@ pub mod gen { ) -> Result<(), flatbuffers::InvalidFlatbuffer> { use self::flatbuffers::Verifiable; v.visit_table(pos)? - .visit_field::>>( - "zarr_metadata", - Self::VT_ZARR_METADATA, - true, - )? - .visit_field::>, - >>("manifests", Self::VT_MANIFESTS, true)? - .finish(); + .visit_field::>>("shape", Self::VT_SHAPE, true)? + .visit_field::>>>("dimension_names", Self::VT_DIMENSION_NAMES, false)? + .visit_field::>>>("manifests", Self::VT_MANIFESTS, true)? + .finish(); Ok(()) } } pub struct ArrayNodeDataArgs<'a> { - pub zarr_metadata: Option>>, + pub shape: + Option>>, + pub dimension_names: Option< + flatbuffers::WIPOffset< + flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, + >, + >, pub manifests: Option< flatbuffers::WIPOffset< flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset>>, @@ -2031,8 +1862,9 @@ pub mod gen { #[inline] fn default() -> Self { ArrayNodeDataArgs { - zarr_metadata: None, // required field - manifests: None, // required field + shape: None, // required field + dimension_names: None, + manifests: None, // required field } } } @@ -2043,13 +1875,25 @@ pub mod gen { } impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayNodeDataBuilder<'a, 'b, A> { #[inline] - pub fn add_zarr_metadata( + pub fn add_shape( + &mut self, + shape: flatbuffers::WIPOffset>, + ) { + self.fbb_.push_slot_always::>( + ArrayNodeData::VT_SHAPE, + shape, + ); + } + #[inline] + pub fn add_dimension_names( &mut self, - zarr_metadata: flatbuffers::WIPOffset>, + dimension_names: flatbuffers::WIPOffset< + flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset>>, + >, ) { self.fbb_.push_slot_always::>( - ArrayNodeData::VT_ZARR_METADATA, - zarr_metadata, + ArrayNodeData::VT_DIMENSION_NAMES, + dimension_names, ); } #[inline] @@ -2074,7 +1918,7 @@ pub mod gen { #[inline] pub fn finish(self) -> flatbuffers::WIPOffset> { let o = self.fbb_.end_table(self.start_); - self.fbb_.required(o, ArrayNodeData::VT_ZARR_METADATA, "zarr_metadata"); + self.fbb_.required(o, ArrayNodeData::VT_SHAPE, "shape"); self.fbb_.required(o, ArrayNodeData::VT_MANIFESTS, "manifests"); flatbuffers::WIPOffset::new(o.value()) } @@ -2083,7 +1927,8 @@ pub mod gen { impl core::fmt::Debug for ArrayNodeData<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mut ds = f.debug_struct("ArrayNodeData"); - ds.field("zarr_metadata", &self.zarr_metadata()); + ds.field("shape", &self.shape()); + ds.field("dimension_names", &self.dimension_names()); ds.field("manifests", &self.manifests()); ds.finish() } @@ -2106,10 +1951,9 @@ pub mod gen { impl<'a> NodeSnapshot<'a> { pub const VT_ID: flatbuffers::VOffsetT = 4; pub const VT_PATH: flatbuffers::VOffsetT = 6; - pub const VT_USER_ATTRIBUTES_TYPE: flatbuffers::VOffsetT = 8; - pub const VT_USER_ATTRIBUTES: flatbuffers::VOffsetT = 10; - pub const VT_NODE_DATA_TYPE: flatbuffers::VOffsetT = 12; - pub const VT_NODE_DATA: flatbuffers::VOffsetT = 14; + pub const VT_USER_DATA: flatbuffers::VOffsetT = 8; + pub const VT_NODE_DATA_TYPE: flatbuffers::VOffsetT = 10; + pub const VT_NODE_DATA: flatbuffers::VOffsetT = 12; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -2129,8 +1973,8 @@ pub mod gen { if let Some(x) = args.node_data { builder.add_node_data(x); } - if let Some(x) = args.user_attributes { - builder.add_user_attributes(x); + if let Some(x) = args.user_data { + builder.add_user_data(x); } if let Some(x) = args.path { builder.add_path(x); @@ -2139,7 +1983,6 @@ pub mod gen { builder.add_id(x); } builder.add_node_data_type(args.node_data_type); - builder.add_user_attributes_type(args.user_attributes_type); builder.finish() } @@ -2165,32 +2008,20 @@ pub mod gen { } } #[inline] - pub fn user_attributes_type(&self) -> UserAttributesSnapshot { + pub fn user_data(&self) -> flatbuffers::Vector<'a, u8> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { self._tab - .get::( - NodeSnapshot::VT_USER_ATTRIBUTES_TYPE, - Some(UserAttributesSnapshot::NONE), + .get::>>( + NodeSnapshot::VT_USER_DATA, + None, ) .unwrap() } } #[inline] - pub fn user_attributes(&self) -> Option> { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab.get::>>( - NodeSnapshot::VT_USER_ATTRIBUTES, - None, - ) - } - } - #[inline] pub fn node_data_type(&self) -> NodeData { // Safety: // Created from valid Table for this object @@ -2218,36 +2049,6 @@ pub mod gen { .unwrap() } } - #[inline] - #[allow(non_snake_case)] - pub fn user_attributes_as_inline(&self) -> Option> { - if self.user_attributes_type() == UserAttributesSnapshot::Inline { - self.user_attributes().map(|t| { - // Safety: - // Created from a valid Table for this object - // Which contains a valid union in this slot - unsafe { InlineUserAttributes::init_from_table(t) } - }) - } else { - None - } - } - - #[inline] - #[allow(non_snake_case)] - pub fn user_attributes_as_reference(&self) -> Option> { - if self.user_attributes_type() == UserAttributesSnapshot::Reference { - self.user_attributes().map(|t| { - // Safety: - // Created from a valid Table for this object - // Which contains a valid union in this slot - unsafe { UserAttributesRef::init_from_table(t) } - }) - } else { - None - } - } - #[inline] #[allow(non_snake_case)] pub fn node_data_as_array(&self) -> Option> { @@ -2287,13 +2088,7 @@ pub mod gen { v.visit_table(pos)? .visit_field::("id", Self::VT_ID, true)? .visit_field::>("path", Self::VT_PATH, true)? - .visit_union::("user_attributes_type", Self::VT_USER_ATTRIBUTES_TYPE, "user_attributes", Self::VT_USER_ATTRIBUTES, false, |key, v, pos| { - match key { - UserAttributesSnapshot::Inline => v.verify_union_variant::>("UserAttributesSnapshot::Inline", pos), - UserAttributesSnapshot::Reference => v.verify_union_variant::>("UserAttributesSnapshot::Reference", pos), - _ => Ok(()), - } - })? + .visit_field::>>("user_data", Self::VT_USER_DATA, true)? .visit_union::("node_data_type", Self::VT_NODE_DATA_TYPE, "node_data", Self::VT_NODE_DATA, true, |key, v, pos| { match key { NodeData::Array => v.verify_union_variant::>("NodeData::Array", pos), @@ -2308,8 +2103,7 @@ pub mod gen { pub struct NodeSnapshotArgs<'a> { pub id: Option<&'a ObjectId8>, pub path: Option>, - pub user_attributes_type: UserAttributesSnapshot, - pub user_attributes: Option>, + pub user_data: Option>>, pub node_data_type: NodeData, pub node_data: Option>, } @@ -2317,10 +2111,9 @@ pub mod gen { #[inline] fn default() -> Self { NodeSnapshotArgs { - id: None, // required field - path: None, // required field - user_attributes_type: UserAttributesSnapshot::NONE, - user_attributes: None, + id: None, // required field + path: None, // required field + user_data: None, // required field node_data_type: NodeData::NONE, node_data: None, // required field } @@ -2344,24 +2137,13 @@ pub mod gen { ); } #[inline] - pub fn add_user_attributes_type( - &mut self, - user_attributes_type: UserAttributesSnapshot, - ) { - self.fbb_.push_slot::( - NodeSnapshot::VT_USER_ATTRIBUTES_TYPE, - user_attributes_type, - UserAttributesSnapshot::NONE, - ); - } - #[inline] - pub fn add_user_attributes( + pub fn add_user_data( &mut self, - user_attributes: flatbuffers::WIPOffset, + user_data: flatbuffers::WIPOffset>, ) { self.fbb_.push_slot_always::>( - NodeSnapshot::VT_USER_ATTRIBUTES, - user_attributes, + NodeSnapshot::VT_USER_DATA, + user_data, ); } #[inline] @@ -2394,6 +2176,7 @@ pub mod gen { let o = self.fbb_.end_table(self.start_); self.fbb_.required(o, NodeSnapshot::VT_ID, "id"); self.fbb_.required(o, NodeSnapshot::VT_PATH, "path"); + self.fbb_.required(o, NodeSnapshot::VT_USER_DATA, "user_data"); self.fbb_.required(o, NodeSnapshot::VT_NODE_DATA, "node_data"); flatbuffers::WIPOffset::new(o.value()) } @@ -2404,27 +2187,7 @@ pub mod gen { let mut ds = f.debug_struct("NodeSnapshot"); ds.field("id", &self.id()); ds.field("path", &self.path()); - ds.field("user_attributes_type", &self.user_attributes_type()); - match self.user_attributes_type() { - UserAttributesSnapshot::Inline => { - if let Some(x) = self.user_attributes_as_inline() { - ds.field("user_attributes", &x) - } else { - ds.field("user_attributes", &"InvalidFlatbuffer: Union discriminant does not match value.") - } - } - UserAttributesSnapshot::Reference => { - if let Some(x) = self.user_attributes_as_reference() { - ds.field("user_attributes", &x) - } else { - ds.field("user_attributes", &"InvalidFlatbuffer: Union discriminant does not match value.") - } - } - _ => { - let x: Option<()> = None; - ds.field("user_attributes", &x) - } - }; + ds.field("user_data", &self.user_data()); ds.field("node_data_type", &self.node_data_type()); match self.node_data_type() { NodeData::Array => { @@ -2472,7 +2235,6 @@ pub mod gen { pub const VT_MESSAGE: flatbuffers::VOffsetT = 12; pub const VT_METADATA: flatbuffers::VOffsetT = 14; pub const VT_MANIFEST_FILES: flatbuffers::VOffsetT = 16; - pub const VT_ATTRIBUTE_FILES: flatbuffers::VOffsetT = 18; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -2490,9 +2252,6 @@ pub mod gen { ) -> flatbuffers::WIPOffset> { let mut builder = SnapshotBuilder::new(_fbb); builder.add_flushed_at(args.flushed_at); - if let Some(x) = args.attribute_files { - builder.add_attribute_files(x); - } if let Some(x) = args.manifest_files { builder.add_manifest_files(x); } @@ -2597,19 +2356,6 @@ pub mod gen { .unwrap() } } - #[inline] - pub fn attribute_files(&self) -> flatbuffers::Vector<'a, AttributeFileInfo> { - // Safety: - // Created from valid Table for this object - // which contains a valid value in this slot - unsafe { - self._tab - .get::, - >>(Snapshot::VT_ATTRIBUTE_FILES, None) - .unwrap() - } - } } impl flatbuffers::Verifiable for Snapshot<'_> { @@ -2627,7 +2373,6 @@ pub mod gen { .visit_field::>("message", Self::VT_MESSAGE, true)? .visit_field::>>>("metadata", Self::VT_METADATA, true)? .visit_field::>>("manifest_files", Self::VT_MANIFEST_FILES, true)? - .visit_field::>>("attribute_files", Self::VT_ATTRIBUTE_FILES, true)? .finish(); Ok(()) } @@ -2649,8 +2394,6 @@ pub mod gen { >, pub manifest_files: Option>>, - pub attribute_files: - Option>>, } impl<'a> Default for SnapshotArgs<'a> { #[inline] @@ -2660,10 +2403,9 @@ pub mod gen { parent_id: None, nodes: None, // required field flushed_at: 0, - message: None, // required field - metadata: None, // required field - manifest_files: None, // required field - attribute_files: None, // required field + message: None, // required field + metadata: None, // required field + manifest_files: None, // required field } } } @@ -2727,18 +2469,6 @@ pub mod gen { ); } #[inline] - pub fn add_attribute_files( - &mut self, - attribute_files: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, AttributeFileInfo>, - >, - ) { - self.fbb_.push_slot_always::>( - Snapshot::VT_ATTRIBUTE_FILES, - attribute_files, - ); - } - #[inline] pub fn new( _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, ) -> SnapshotBuilder<'a, 'b, A> { @@ -2753,7 +2483,6 @@ pub mod gen { self.fbb_.required(o, Snapshot::VT_MESSAGE, "message"); self.fbb_.required(o, Snapshot::VT_METADATA, "metadata"); self.fbb_.required(o, Snapshot::VT_MANIFEST_FILES, "manifest_files"); - self.fbb_.required(o, Snapshot::VT_ATTRIBUTE_FILES, "attribute_files"); flatbuffers::WIPOffset::new(o.value()) } } @@ -2768,7 +2497,6 @@ pub mod gen { ds.field("message", &self.message()); ds.field("metadata", &self.metadata()); ds.field("manifest_files", &self.manifest_files()); - ds.field("attribute_files", &self.attribute_files()); ds.finish() } } @@ -3063,8 +2791,8 @@ pub mod gen { pub const VT_NEW_ARRAYS: flatbuffers::VOffsetT = 8; pub const VT_DELETED_GROUPS: flatbuffers::VOffsetT = 10; pub const VT_DELETED_ARRAYS: flatbuffers::VOffsetT = 12; - pub const VT_UPDATED_USER_ATTRIBUTES: flatbuffers::VOffsetT = 14; - pub const VT_UPDATED_ZARR_METADATA: flatbuffers::VOffsetT = 16; + pub const VT_UPDATED_ARRAYS: flatbuffers::VOffsetT = 14; + pub const VT_UPDATED_GROUPS: flatbuffers::VOffsetT = 16; pub const VT_UPDATED_CHUNKS: flatbuffers::VOffsetT = 18; #[inline] @@ -3085,11 +2813,11 @@ pub mod gen { if let Some(x) = args.updated_chunks { builder.add_updated_chunks(x); } - if let Some(x) = args.updated_zarr_metadata { - builder.add_updated_zarr_metadata(x); + if let Some(x) = args.updated_groups { + builder.add_updated_groups(x); } - if let Some(x) = args.updated_user_attributes { - builder.add_updated_user_attributes(x); + if let Some(x) = args.updated_arrays { + builder.add_updated_arrays(x); } if let Some(x) = args.deleted_arrays { builder.add_deleted_arrays(x); @@ -3153,21 +2881,21 @@ pub mod gen { } } #[inline] - pub fn updated_user_attributes(&self) -> flatbuffers::Vector<'a, ObjectId8> { + pub fn updated_arrays(&self) -> flatbuffers::Vector<'a, ObjectId8> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { - self._tab.get::>>(TransactionLog::VT_UPDATED_USER_ATTRIBUTES, None).unwrap() + self._tab.get::>>(TransactionLog::VT_UPDATED_ARRAYS, None).unwrap() } } #[inline] - pub fn updated_zarr_metadata(&self) -> flatbuffers::Vector<'a, ObjectId8> { + pub fn updated_groups(&self) -> flatbuffers::Vector<'a, ObjectId8> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot unsafe { - self._tab.get::>>(TransactionLog::VT_UPDATED_ZARR_METADATA, None).unwrap() + self._tab.get::>>(TransactionLog::VT_UPDATED_GROUPS, None).unwrap() } } #[inline] @@ -3204,8 +2932,8 @@ pub mod gen { .visit_field::>>("new_arrays", Self::VT_NEW_ARRAYS, true)? .visit_field::>>("deleted_groups", Self::VT_DELETED_GROUPS, true)? .visit_field::>>("deleted_arrays", Self::VT_DELETED_ARRAYS, true)? - .visit_field::>>("updated_user_attributes", Self::VT_UPDATED_USER_ATTRIBUTES, true)? - .visit_field::>>("updated_zarr_metadata", Self::VT_UPDATED_ZARR_METADATA, true)? + .visit_field::>>("updated_arrays", Self::VT_UPDATED_ARRAYS, true)? + .visit_field::>>("updated_groups", Self::VT_UPDATED_GROUPS, true)? .visit_field::>>>("updated_chunks", Self::VT_UPDATED_CHUNKS, true)? .finish(); Ok(()) @@ -3221,9 +2949,9 @@ pub mod gen { Option>>, pub deleted_arrays: Option>>, - pub updated_user_attributes: + pub updated_arrays: Option>>, - pub updated_zarr_metadata: + pub updated_groups: Option>>, pub updated_chunks: Option< flatbuffers::WIPOffset< @@ -3238,14 +2966,14 @@ pub mod gen { #[inline] fn default() -> Self { TransactionLogArgs { - id: None, // required field - new_groups: None, // required field - new_arrays: None, // required field - deleted_groups: None, // required field - deleted_arrays: None, // required field - updated_user_attributes: None, // required field - updated_zarr_metadata: None, // required field - updated_chunks: None, // required field + id: None, // required field + new_groups: None, // required field + new_arrays: None, // required field + deleted_groups: None, // required field + deleted_arrays: None, // required field + updated_arrays: None, // required field + updated_groups: None, // required field + updated_chunks: None, // required field } } } @@ -3300,27 +3028,23 @@ pub mod gen { ); } #[inline] - pub fn add_updated_user_attributes( + pub fn add_updated_arrays( &mut self, - updated_user_attributes: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, ObjectId8>, - >, + updated_arrays: flatbuffers::WIPOffset>, ) { self.fbb_.push_slot_always::>( - TransactionLog::VT_UPDATED_USER_ATTRIBUTES, - updated_user_attributes, + TransactionLog::VT_UPDATED_ARRAYS, + updated_arrays, ); } #[inline] - pub fn add_updated_zarr_metadata( + pub fn add_updated_groups( &mut self, - updated_zarr_metadata: flatbuffers::WIPOffset< - flatbuffers::Vector<'b, ObjectId8>, - >, + updated_groups: flatbuffers::WIPOffset>, ) { self.fbb_.push_slot_always::>( - TransactionLog::VT_UPDATED_ZARR_METADATA, - updated_zarr_metadata, + TransactionLog::VT_UPDATED_GROUPS, + updated_groups, ); } #[inline] @@ -3353,16 +3077,8 @@ pub mod gen { self.fbb_.required(o, TransactionLog::VT_NEW_ARRAYS, "new_arrays"); self.fbb_.required(o, TransactionLog::VT_DELETED_GROUPS, "deleted_groups"); self.fbb_.required(o, TransactionLog::VT_DELETED_ARRAYS, "deleted_arrays"); - self.fbb_.required( - o, - TransactionLog::VT_UPDATED_USER_ATTRIBUTES, - "updated_user_attributes", - ); - self.fbb_.required( - o, - TransactionLog::VT_UPDATED_ZARR_METADATA, - "updated_zarr_metadata", - ); + self.fbb_.required(o, TransactionLog::VT_UPDATED_ARRAYS, "updated_arrays"); + self.fbb_.required(o, TransactionLog::VT_UPDATED_GROUPS, "updated_groups"); self.fbb_.required(o, TransactionLog::VT_UPDATED_CHUNKS, "updated_chunks"); flatbuffers::WIPOffset::new(o.value()) } @@ -3376,8 +3092,8 @@ pub mod gen { ds.field("new_arrays", &self.new_arrays()); ds.field("deleted_groups", &self.deleted_groups()); ds.field("deleted_arrays", &self.deleted_arrays()); - ds.field("updated_user_attributes", &self.updated_user_attributes()); - ds.field("updated_zarr_metadata", &self.updated_zarr_metadata()); + ds.field("updated_arrays", &self.updated_arrays()); + ds.field("updated_groups", &self.updated_groups()); ds.field("updated_chunks", &self.updated_chunks()); ds.finish() } diff --git a/icechunk/src/format/mod.rs b/icechunk/src/format/mod.rs index 202820e7..dcb2c877 100644 --- a/icechunk/src/format/mod.rs +++ b/icechunk/src/format/mod.rs @@ -19,7 +19,7 @@ use serde_with::{serde_as, TryFromInto}; use thiserror::Error; use typed_path::Utf8UnixPathBuf; -use crate::{error::ICError, metadata::DataType, private}; +use crate::{error::ICError, private}; pub mod attributes; pub mod manifest; @@ -241,11 +241,6 @@ pub type TableOffset = u32; pub enum IcechunkFormatErrorKind { #[error(transparent)] VirtualReferenceError(VirtualReferenceErrorKind), - - #[error("error decoding fill_value from array. Found size: {found_size}, target size: {target_size}, type: {target_type}")] - FillValueDecodeError { found_size: usize, target_size: usize, target_type: DataType }, - #[error("error decoding fill_value from json. Type: {data_type}, value: {value}")] - FillValueParse { data_type: DataType, value: serde_json::Value }, #[error("node not found at `{path:?}`")] NodeNotFound { path: Path }, #[error("chunk coordinates not found `{coords:?}`")] diff --git a/icechunk/src/format/snapshot.rs b/icechunk/src/format/snapshot.rs index 13aeaac6..c6ebcbfb 100644 --- a/icechunk/src/format/snapshot.rs +++ b/icechunk/src/format/snapshot.rs @@ -1,5 +1,6 @@ -use std::{collections::BTreeMap, convert::Infallible, sync::Arc}; +use std::{collections::BTreeMap, convert::Infallible, num::NonZeroU64, sync::Arc}; +use bytes::Bytes; use chrono::{DateTime, Utc}; use err_into::ErrorInto; use flatbuffers::{FlatBufferBuilder, VerifierOptions}; @@ -7,68 +8,44 @@ use itertools::Itertools as _; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::metadata::{ - ArrayShape, ChunkKeyEncoding, ChunkShape, Codec, DataType, DimensionNames, FillValue, - StorageTransformer, UserAttributes, -}; - use super::{ flatbuffers::gen, manifest::{Manifest, ManifestExtents, ManifestRef}, AttributesId, ChunkIndices, IcechunkFormatError, IcechunkFormatErrorKind, - IcechunkResult, ManifestId, NodeId, Path, SnapshotId, TableOffset, + IcechunkResult, ManifestId, NodeId, Path, SnapshotId, }; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct UserAttributesRef { - pub object_id: AttributesId, - pub location: TableOffset, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub enum UserAttributesSnapshot { - Inline(UserAttributes), - Ref(UserAttributesRef), -} - -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub enum NodeType { - Group, - Array, +pub struct DimensionShape { + dim_length: u64, + chunk_length: u64, } -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct ZarrArrayMetadata { - pub shape: ArrayShape, - pub data_type: DataType, - pub chunk_shape: ChunkShape, - pub chunk_key_encoding: ChunkKeyEncoding, - pub fill_value: FillValue, - pub codecs: Vec, - pub storage_transformers: Option>, - pub dimension_names: Option, +impl DimensionShape { + pub fn new(array_length: u64, chunk_length: NonZeroU64) -> Self { + Self { dim_length: array_length, chunk_length: chunk_length.get() } + } + pub fn array_length(&self) -> u64 { + self.dim_length + } + pub fn chunk_length(&self) -> u64 { + self.chunk_length + } } -impl ZarrArrayMetadata { - /// Returns an iterator over the maximum permitted chunk indices for the array. - /// - /// This function calculates the maximum chunk indices based on the shape of the array - /// and the chunk shape, using (shape - 1) / chunk_shape. Given integer division is truncating, - /// this will always result in proper indices at the boundaries. - /// - /// # Returns - /// - /// A ChunkIndices type containing the max chunk index for each dimension. - fn max_chunk_indices_permitted(&self) -> ChunkIndices { - debug_assert_eq!(self.shape.len(), self.chunk_shape.0.len()); +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ArrayShape(Vec); - ChunkIndices( - self.shape - .iter() - .zip(self.chunk_shape.0.iter()) - .map(|(s, cs)| if *s == 0 { 0 } else { ((s - 1) / cs.get()) as u32 }) - .collect(), - ) +impl ArrayShape { + pub fn new(it: I) -> Option + where + I: IntoIterator, + { + let v = it.into_iter().map(|(al, cl)| { + let cl = NonZeroU64::new(cl)?; + Some(DimensionShape::new(al, cl)) + }); + v.collect::>>().map(Self) } /// Validates the provided chunk coordinates for the array. @@ -87,27 +64,75 @@ impl ZarrArrayMetadata { /// /// Returns false if the chunk coordinates are invalid. pub fn valid_chunk_coord(&self, coord: &ChunkIndices) -> bool { - debug_assert_eq!(self.shape.len(), coord.0.len()); - coord .0 .iter() - .zip(self.max_chunk_indices_permitted().0) + .zip(self.max_chunk_indices_permitted()) .all(|(index, index_permitted)| *index <= index_permitted) } + + /// Returns an iterator over the maximum permitted chunk indices for the array. + /// + /// This function calculates the maximum chunk indices based on the shape of the array + /// and the chunk shape, using (shape - 1) / chunk_shape. Given integer division is truncating, + /// this will always result in proper indices at the boundaries. + fn max_chunk_indices_permitted(&self) -> impl Iterator + '_ { + self.0.iter().map(|dim_shape| { + if dim_shape.chunk_length == 0 || dim_shape.dim_length == 0 { + 0 + } else { + ((dim_shape.dim_length - 1) / dim_shape.chunk_length) as u32 + } + }) + } } -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum DimensionName { + NotSpecified, + Name(String), +} + +impl From> for DimensionName { + fn from(value: Option<&str>) -> Self { + match value { + Some(s) => s.into(), + None => DimensionName::NotSpecified, + } + } +} + +impl From<&str> for DimensionName { + fn from(value: &str) -> Self { + if value.is_empty() { + DimensionName::NotSpecified + } else { + DimensionName::Name(value.to_string()) + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum NodeData { - Array(ZarrArrayMetadata, Vec), + Array { + shape: ArrayShape, + dimension_names: Option>, + manifests: Vec, + }, + Group, +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub enum NodeType { Group, + Array, } #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct NodeSnapshot { pub id: NodeId, pub path: Path, - pub user_attributes: Option, + pub user_data: Bytes, pub node_data: NodeData, } @@ -115,7 +140,7 @@ impl NodeSnapshot { pub fn node_type(&self) -> NodeType { match &self.node_data { NodeData::Group => NodeType::Group, - NodeData::Array(_, _) => NodeType::Array, + NodeData::Array { .. } => NodeType::Array, } } } @@ -147,14 +172,23 @@ impl<'a> From> for ManifestRef { } } -impl<'a> TryFrom> for NodeData { - type Error = rmp_serde::decode::Error; +impl From<&gen::DimensionShape> for DimensionShape { + fn from(value: &gen::DimensionShape) -> Self { + DimensionShape { + dim_length: value.array_length(), + chunk_length: value.chunk_length(), + } + } +} - fn try_from(value: gen::ArrayNodeData<'a>) -> Result { - // TODO: is it ok to call `bytes` here? Or do we need to collect an iterator - let meta = rmp_serde::from_slice(value.zarr_metadata().bytes())?; - let manifest_refs = value.manifests().iter().map(|m| m.into()).collect(); - Ok(Self::Array(meta, manifest_refs)) +impl<'a> From> for NodeData { + fn from(value: gen::ArrayNodeData<'a>) -> Self { + let dimension_names = value + .dimension_names() + .map(|dn| dn.iter().map(|name| name.name().into()).collect()); + let shape = ArrayShape(value.shape().iter().map(|dim| dim.into()).collect()); + let manifests = value.manifests().iter().map(|m| m.into()).collect(); + Self::Array { shape, dimension_names, manifests } } } @@ -164,62 +198,25 @@ impl<'a> From> for NodeData { } } -impl<'a> TryFrom> for UserAttributesSnapshot { - type Error = rmp_serde::decode::Error; - - fn try_from(value: gen::InlineUserAttributes<'a>) -> Result { - let parsed = rmp_serde::from_slice(value.data().bytes())?; - Ok(Self::Inline(UserAttributes { parsed })) - } -} - -impl<'a> From> for UserAttributesSnapshot { - fn from(value: gen::UserAttributesRef<'a>) -> Self { - Self::Ref(UserAttributesRef { - object_id: value.object_id().into(), - location: value.location(), - }) - } -} - impl<'a> TryFrom> for NodeSnapshot { type Error = IcechunkFormatError; fn try_from(value: gen::NodeSnapshot<'a>) -> Result { #[allow(clippy::expect_used, clippy::panic)] let node_data: NodeData = match value.node_data_type() { - gen::NodeData::Array => value - .node_data_as_array() - .expect("Bug in flatbuffers library") - .try_into()?, + gen::NodeData::Array => { + value.node_data_as_array().expect("Bug in flatbuffers library").into() + } gen::NodeData::Group => { value.node_data_as_group().expect("Bug in flatbuffers library").into() } x => panic!("Invalid node data type in flatbuffers file {:?}", x), }; - #[allow(clippy::expect_used, clippy::panic)] - let user_attributes: Option = - match value.user_attributes_type() { - gen::UserAttributesSnapshot::Inline => Some( - value - .user_attributes_as_inline() - .expect("Bug in flatbuffers library") - .try_into()?, - ), - gen::UserAttributesSnapshot::Reference => Some( - value - .user_attributes_as_reference() - .expect("Bug in flatbuffers library") - .into(), - ), - gen::UserAttributesSnapshot::NONE => None, - x => panic!("Invalid user attributes type in flatbuffers file {:?}", x), - }; let res = NodeSnapshot { id: value.id().into(), path: value.path().to_string().try_into()?, - user_attributes, node_data, + user_data: Bytes::copy_from_slice(value.user_data().bytes()), }; Ok(res) } @@ -235,12 +232,6 @@ impl From<&gen::ManifestFileInfo> for ManifestFileInfo { } } -impl From<&gen::AttributeFileInfo> for AttributeFileInfo { - fn from(value: &gen::AttributeFileInfo) -> Self { - Self { id: value.id().into() } - } -} - pub type SnapshotProperties = BTreeMap; #[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Eq, Hash)] @@ -260,11 +251,6 @@ impl ManifestFileInfo { } } -#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] -pub struct AttributeFileInfo { - pub id: AttributesId, -} - #[derive(Debug, PartialEq)] pub struct Snapshot { buffer: Vec, @@ -327,7 +313,6 @@ impl Snapshot { message: String, properties: Option, mut manifest_files: Vec, - mut attribute_files: Vec, sorted_iter: I, ) -> IcechunkResult where @@ -347,16 +332,6 @@ impl Snapshot { .collect::>(); let manifest_files = builder.create_vector(&manifest_files); - attribute_files.sort_by(|a, b| a.id.cmp(&b.id)); - let attribute_files = attribute_files - .iter() - .map(|att| { - let id = gen::ObjectId12::new(&att.id.0); - gen::AttributeFileInfo::new(&id) - }) - .collect::>(); - let attribute_files = builder.create_vector(&attribute_files); - let metadata_items: Vec<_> = properties .unwrap_or_default() .iter() @@ -393,7 +368,6 @@ impl Snapshot { message: Some(message), metadata: Some(metadata_items), manifest_files: Some(manifest_files), - attribute_files: Some(attribute_files), }, ); @@ -413,7 +387,6 @@ impl Snapshot { Self::INITIAL_COMMIT_MESSAGE.to_string(), Some(properties), Default::default(), - Default::default(), nodes, ) } @@ -475,10 +448,6 @@ impl Snapshot { self.root().manifest_files().iter().map(|mf| mf.into()) } - pub fn attribute_files(&self) -> impl Iterator + '_ { - self.root().attribute_files().iter().map(|f| f.into()) - } - /// Cretase a new `Snapshot` with all the same data as `new_child` but `self` as parent pub fn adopt(&self, new_child: &Snapshot) -> IcechunkResult { // Rust flatbuffers implementation doesn't allow mutation of scalars, so we need to @@ -490,7 +459,6 @@ impl Snapshot { new_child.message().clone(), Some(new_child.metadata()?.clone()), new_child.manifest_files().collect(), - new_child.attribute_files().collect(), new_child.iter(), ) } @@ -560,54 +528,20 @@ fn mk_node<'bldr>( ) -> IcechunkResult>> { let id = gen::ObjectId8::new(&node.id.0); let path = builder.create_string(node.path.to_string().as_str()); - let (user_attributes_type, user_attributes) = - mk_user_attributes(builder, node.user_attributes.as_ref())?; let (node_data_type, node_data) = mk_node_data(builder, &node.node_data)?; + let user_data = Some(builder.create_vector(&node.user_data)); Ok(gen::NodeSnapshot::create( builder, &gen::NodeSnapshotArgs { id: Some(&id), path: Some(path), - user_attributes_type, - user_attributes, node_data_type, node_data, + user_data, }, )) } -fn mk_user_attributes( - builder: &mut flatbuffers::FlatBufferBuilder<'_>, - atts: Option<&UserAttributesSnapshot>, -) -> IcechunkResult<( - gen::UserAttributesSnapshot, - Option>, -)> { - match atts { - Some(UserAttributesSnapshot::Inline(user_attributes)) => { - let data = builder - .create_vector(rmp_serde::to_vec(&user_attributes.parsed)?.as_slice()); - let inl = gen::InlineUserAttributes::create( - builder, - &gen::InlineUserAttributesArgs { data: Some(data) }, - ); - Ok((gen::UserAttributesSnapshot::Inline, Some(inl.as_union_value()))) - } - Some(UserAttributesSnapshot::Ref(uatts)) => { - let id = gen::ObjectId12::new(&uatts.object_id.0); - let reference = gen::UserAttributesRef::create( - builder, - &gen::UserAttributesRefArgs { - object_id: Some(&id), - location: uatts.location, - }, - ); - Ok((gen::UserAttributesSnapshot::Reference, Some(reference.as_union_value()))) - } - None => Ok((gen::UserAttributesSnapshot::NONE, None)), - } -} - fn mk_node_data( builder: &mut FlatBufferBuilder<'_>, node_data: &NodeData, @@ -616,9 +550,7 @@ fn mk_node_data( Option>, )> { match node_data { - NodeData::Array(zarr, manifests) => { - let zarr_metadata = - Some(builder.create_vector(rmp_serde::to_vec(zarr)?.as_slice())); + NodeData::Array { manifests, dimension_names, shape } => { let manifests = manifests .iter() .map(|manref| { @@ -639,14 +571,40 @@ fn mk_node_data( }) .collect::>(); let manifests = builder.create_vector(manifests.as_slice()); + let dimensions = dimension_names.as_ref().map(|dn| { + let names = dn + .iter() + .map(|n| match n { + DimensionName::Name(s) => { + let n = builder.create_shared_string(s.as_str()); + gen::DimensionName::create( + builder, + &gen::DimensionNameArgs { name: Some(n) }, + ) + } + DimensionName::NotSpecified => gen::DimensionName::create( + builder, + &gen::DimensionNameArgs { name: None }, + ), + }) + .collect::>(); + builder.create_vector(names.as_slice()) + }); + let shape = shape + .0 + .iter() + .map(|ds| gen::DimensionShape::new(ds.dim_length, ds.chunk_length)) + .collect::>(); + let shape = builder.create_vector(shape.as_slice()); Ok(( gen::NodeData::Array, Some( gen::ArrayNodeData::create( builder, &gen::ArrayNodeDataArgs { - zarr_metadata, manifests: Some(manifests), + shape: Some(shape), + dimension_names: dimensions, }, ) .as_union_value(), @@ -670,54 +628,23 @@ mod tests { use super::*; use pretty_assertions::assert_eq; - use std::{ - collections::HashMap, - iter::{self}, - num::NonZeroU64, - }; + use std::iter::{self}; #[test] fn test_get_node() -> Result<(), Box> { - let zarr_meta1 = ZarrArrayMetadata { - shape: vec![10u64, 20, 30], - data_type: DataType::Float32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(3).unwrap(), - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float32(0f32), - - codecs: vec![Codec { - name: "mycodec".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; - let zarr_meta2 = ZarrArrayMetadata { - storage_transformers: None, - data_type: DataType::Int32, - dimension_names: Some(vec![None, None, Some("t".to_string())]), - fill_value: FillValue::Int32(0i32), - ..zarr_meta1.clone() - }; - let zarr_meta3 = - ZarrArrayMetadata { dimension_names: None, ..zarr_meta2.clone() }; + let shape1 = ArrayShape::new(vec![(10u64, 3), (20, 2), (30, 1)]).unwrap(); + let dim_names1 = Some(vec!["x".into(), "y".into(), "t".into()]); + + let shape2 = shape1.clone(); + let dim_names2 = Some(vec![ + DimensionName::NotSpecified, + DimensionName::NotSpecified, + "t".into(), + ]); + + let shape3 = shape1.clone(); + let dim_names3 = None; + let man_ref1 = ManifestRef { object_id: ObjectId::random(), extents: ManifestExtents::new(&[0, 0, 0], &[100, 100, 100]), @@ -727,58 +654,61 @@ mod tests { extents: ManifestExtents::new(&[0, 0, 0], &[100, 100, 100]), }; - let oid = ObjectId::random(); let node_ids = iter::repeat_with(NodeId::random).take(7).collect::>(); // nodes must be sorted by path let nodes = vec![ NodeSnapshot { path: Path::root(), id: node_ids[0].clone(), - user_attributes: None, + user_data: Bytes::new(), node_data: NodeData::Group, }, NodeSnapshot { path: "/a".try_into().unwrap(), id: node_ids[1].clone(), - user_attributes: None, + user_data: Bytes::new(), node_data: NodeData::Group, }, NodeSnapshot { path: "/array2".try_into().unwrap(), id: node_ids[5].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta2.clone(), vec![]), + user_data: Bytes::new(), + node_data: NodeData::Array { + shape: shape2.clone(), + dimension_names: dim_names2.clone(), + manifests: vec![], + }, }, NodeSnapshot { path: "/b".try_into().unwrap(), id: node_ids[2].clone(), - user_attributes: None, + user_data: Bytes::new(), node_data: NodeData::Group, }, NodeSnapshot { path: "/b/array1".try_into().unwrap(), id: node_ids[4].clone(), - user_attributes: Some(UserAttributesSnapshot::Ref(UserAttributesRef { - object_id: oid.clone(), - location: 42, - })), - node_data: NodeData::Array( - zarr_meta1.clone(), - vec![man_ref1.clone(), man_ref2.clone()], - ), + user_data: Bytes::copy_from_slice(b"hello"), + node_data: NodeData::Array { + shape: shape1.clone(), + dimension_names: dim_names1.clone(), + manifests: vec![man_ref1.clone(), man_ref2.clone()], + }, }, NodeSnapshot { path: "/b/array3".try_into().unwrap(), id: node_ids[6].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta3.clone(), vec![]), + user_data: Bytes::new(), + node_data: NodeData::Array { + shape: shape3.clone(), + dimension_names: dim_names3.clone(), + manifests: vec![], + }, }, NodeSnapshot { path: "/b/c".try_into().unwrap(), id: node_ids[3].clone(), - user_attributes: Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"foo": "some inline"}"#).unwrap(), - )), + user_data: Bytes::copy_from_slice(b"bye"), node_data: NodeData::Group, }, ]; @@ -801,7 +731,6 @@ mod tests { String::default(), Default::default(), manifests, - vec![], nodes.into_iter().map(Ok::), ) .unwrap(); @@ -822,9 +751,7 @@ mod tests { NodeSnapshot { path: "/b/c".try_into().unwrap(), id: node_ids[3].clone(), - user_attributes: Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"foo": "some inline"}"#).unwrap(), - )), + user_data: Bytes::copy_from_slice(b"bye"), node_data: NodeData::Group, }, ); @@ -834,7 +761,7 @@ mod tests { NodeSnapshot { path: Path::root(), id: node_ids[0].clone(), - user_attributes: None, + user_data: Bytes::new(), node_data: NodeData::Group, }, ); @@ -844,11 +771,12 @@ mod tests { NodeSnapshot { path: "/b/array1".try_into().unwrap(), id: node_ids[4].clone(), - user_attributes: Some(UserAttributesSnapshot::Ref(UserAttributesRef { - object_id: oid, - location: 42, - })), - node_data: NodeData::Array(zarr_meta1.clone(), vec![man_ref1, man_ref2]), + user_data: Bytes::copy_from_slice(b"hello"), + node_data: NodeData::Array { + shape: shape1.clone(), + dimension_names: dim_names1.clone(), + manifests: vec![man_ref1, man_ref2] + }, }, ); let node = st.get_node(&"/array2".try_into().unwrap()).unwrap(); @@ -857,8 +785,12 @@ mod tests { NodeSnapshot { path: "/array2".try_into().unwrap(), id: node_ids[5].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta2.clone(), vec![]), + user_data: Bytes::new(), + node_data: NodeData::Array { + shape: shape2.clone(), + dimension_names: dim_names2.clone(), + manifests: vec![] + }, }, ); let node = st.get_node(&"/b/array3".try_into().unwrap()).unwrap(); @@ -867,8 +799,12 @@ mod tests { NodeSnapshot { path: "/b/array3".try_into().unwrap(), id: node_ids[6].clone(), - user_attributes: None, - node_data: NodeData::Array(zarr_meta3.clone(), vec![]), + user_data: Bytes::new(), + node_data: NodeData::Array { + shape: shape3.clone(), + dimension_names: dim_names3.clone(), + manifests: vec![] + }, }, ); Ok(()) @@ -876,45 +812,17 @@ mod tests { #[test] fn test_valid_chunk_coord() { - let zarr_meta1 = ZarrArrayMetadata { - shape: vec![10000, 10001, 9999], - data_type: DataType::Float32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1000).unwrap(), - NonZeroU64::new(1000).unwrap(), - NonZeroU64::new(1000).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float32(0f32), - - codecs: vec![Codec { - name: "mycodec".to_string(), - configuration: Some(HashMap::from_iter(iter::once(( - "foo".to_string(), - serde_json::Value::from(42), - )))), - }], - storage_transformers: None, - dimension_names: None, - }; - - let zarr_meta2 = ZarrArrayMetadata { - shape: vec![0, 0, 0], - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1000).unwrap(), - NonZeroU64::new(1000).unwrap(), - NonZeroU64::new(1000).unwrap(), - ]), - ..zarr_meta1.clone() - }; - + let shape1 = + ArrayShape::new(vec![(10_000, 1_000), (10_001, 1_000), (9_999, 1_000)]) + .unwrap(); + let shape2 = ArrayShape::new(vec![(0, 1_000), (0, 1_000), (0, 1_000)]).unwrap(); let coord1 = ChunkIndices(vec![9, 10, 9]); let coord2 = ChunkIndices(vec![10, 11, 10]); let coord3 = ChunkIndices(vec![0, 0, 0]); - assert!(zarr_meta1.valid_chunk_coord(&coord1)); - assert!(!zarr_meta1.valid_chunk_coord(&coord2)); + assert!(shape1.valid_chunk_coord(&coord1)); + assert!(!shape1.valid_chunk_coord(&coord2)); - assert!(zarr_meta2.valid_chunk_coord(&coord3)); + assert!(shape2.valid_chunk_coord(&coord3)); } } diff --git a/icechunk/src/format/transaction_log.rs b/icechunk/src/format/transaction_log.rs index 9c995d46..eb2b4540 100644 --- a/icechunk/src/format/transaction_log.rs +++ b/icechunk/src/format/transaction_log.rs @@ -30,12 +30,10 @@ impl TransactionLog { let mut deleted_arrays: Vec<_> = cs.deleted_arrays().map(|(_, id)| gen::ObjectId8::new(&id.0)).collect(); - let mut updated_user_attributes: Vec<_> = cs - .user_attributes_updated_nodes() - .map(|id| gen::ObjectId8::new(&id.0)) - .collect(); - let mut updated_zarr_metadata: Vec<_> = - cs.zarr_updated_arrays().map(|id| gen::ObjectId8::new(&id.0)).collect(); + let mut updated_arrays: Vec<_> = + cs.updated_arrays().map(|id| gen::ObjectId8::new(&id.0)).collect(); + let mut updated_groups: Vec<_> = + cs.updated_groups().map(|id| gen::ObjectId8::new(&id.0)).collect(); // TODO: what's a good capacity? let mut builder = flatbuffers::FlatBufferBuilder::with_capacity(1_024 * 1_024); @@ -70,17 +68,15 @@ impl TransactionLog { new_arrays.sort_by(|a, b| a.0.cmp(&b.0)); deleted_groups.sort_by(|a, b| a.0.cmp(&b.0)); deleted_arrays.sort_by(|a, b| a.0.cmp(&b.0)); - updated_user_attributes.sort_by(|a, b| a.0.cmp(&b.0)); - updated_zarr_metadata.sort_by(|a, b| a.0.cmp(&b.0)); + updated_groups.sort_by(|a, b| a.0.cmp(&b.0)); + updated_arrays.sort_by(|a, b| a.0.cmp(&b.0)); let new_groups = Some(builder.create_vector(new_groups.as_slice())); let new_arrays = Some(builder.create_vector(new_arrays.as_slice())); let deleted_groups = Some(builder.create_vector(deleted_groups.as_slice())); let deleted_arrays = Some(builder.create_vector(deleted_arrays.as_slice())); - let updated_user_attributes = - Some(builder.create_vector(updated_user_attributes.as_slice())); - let updated_zarr_metadata = - Some(builder.create_vector(updated_zarr_metadata.as_slice())); + let updated_groups = Some(builder.create_vector(updated_groups.as_slice())); + let updated_arrays = Some(builder.create_vector(updated_arrays.as_slice())); let id = ObjectId12::new(&id.0); let id = Some(&id); @@ -92,8 +88,8 @@ impl TransactionLog { new_arrays, deleted_groups, deleted_arrays, - updated_user_attributes, - updated_zarr_metadata, + updated_groups, + updated_arrays, updated_chunks, }, ); @@ -129,12 +125,12 @@ impl TransactionLog { self.root().deleted_arrays().iter().map(From::from) } - pub fn updated_user_attributes(&self) -> impl Iterator + '_ { - self.root().updated_user_attributes().iter().map(From::from) + pub fn updated_groups(&self) -> impl Iterator + '_ { + self.root().updated_groups().iter().map(From::from) } - pub fn updated_zarr_metadata(&self) -> impl Iterator + '_ { - self.root().updated_zarr_metadata().iter().map(From::from) + pub fn updated_arrays(&self) -> impl Iterator + '_ { + self.root().updated_arrays().iter().map(From::from) } pub fn updated_chunks( @@ -179,11 +175,12 @@ impl TransactionLog { self.root().deleted_arrays().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() } - pub fn user_attributes_updated(&self, id: &NodeId) -> bool { - self.root() - .updated_user_attributes() - .lookup_by_key(id.0, |a, b| a.0.cmp(b)) - .is_some() + pub fn group_updated(&self, id: &NodeId) -> bool { + self.root().updated_groups().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() + } + + pub fn array_updated(&self, id: &NodeId) -> bool { + self.root().updated_arrays().lookup_by_key(id.0, |a, b| a.0.cmp(b)).is_some() } pub fn chunks_updated(&self, id: &NodeId) -> bool { @@ -193,13 +190,6 @@ impl TransactionLog { .is_some() } - pub fn zarr_metadata_updated(&self, id: &NodeId) -> bool { - self.root() - .updated_zarr_metadata() - .lookup_by_key(id.0, |a, b| a.0.cmp(b)) - .is_some() - } - fn root(&self) -> gen::TransactionLog { // without the unsafe version this is too slow // if we try to keep the root in the TransactionLog struct, we would need a lifetime @@ -216,8 +206,8 @@ impl TransactionLog { + root.new_arrays().len() + root.deleted_groups().len() + root.deleted_arrays().len() - + root.updated_user_attributes().len() - + root.updated_zarr_metadata().len() + + root.updated_groups().len() + + root.updated_arrays().len() + root.updated_chunks().iter().map(|s| s.chunks().len()).sum::() } @@ -240,8 +230,8 @@ pub struct DiffBuilder { new_arrays: HashSet, deleted_groups: HashSet, deleted_arrays: HashSet, - updated_user_attributes: HashSet, - updated_zarr_metadata: HashSet, + updated_groups: HashSet, + updated_arrays: HashSet, // we use sorted set here to simply move it to a diff without having to rebuild updated_chunks: HashMap>, } @@ -252,8 +242,8 @@ impl DiffBuilder { self.new_arrays.extend(tx.new_arrays()); self.deleted_groups.extend(tx.deleted_groups()); self.deleted_arrays.extend(tx.deleted_arrays()); - self.updated_user_attributes.extend(tx.updated_user_attributes()); - self.updated_zarr_metadata.extend(tx.updated_zarr_metadata()); + self.updated_groups.extend(tx.updated_groups()); + self.updated_arrays.extend(tx.updated_arrays()); for (node, chunks) in tx.updated_chunks() { match self.updated_chunks.get_mut(&node) { @@ -284,8 +274,8 @@ pub struct Diff { pub new_arrays: BTreeSet, pub deleted_groups: BTreeSet, pub deleted_arrays: BTreeSet, - pub updated_user_attributes: BTreeSet, - pub updated_zarr_metadata: BTreeSet, + pub updated_groups: BTreeSet, + pub updated_arrays: BTreeSet, pub updated_chunks: BTreeMap>, } @@ -315,14 +305,14 @@ impl Diff { .flat_map(|node_id| nodes.get(node_id)) .cloned() .collect(); - let updated_user_attributes = tx - .updated_user_attributes + let updated_groups = tx + .updated_groups .iter() .flat_map(|node_id| nodes.get(node_id)) .cloned() .collect(); - let updated_zarr_metadata = tx - .updated_zarr_metadata + let updated_arrays = tx + .updated_arrays .iter() .flat_map(|node_id| nodes.get(node_id)) .cloned() @@ -340,8 +330,8 @@ impl Diff { new_arrays, deleted_groups, deleted_arrays, - updated_user_attributes, - updated_zarr_metadata, + updated_groups, + updated_arrays, updated_chunks, } } @@ -352,8 +342,8 @@ impl Diff { && self.new_arrays.is_empty() && self.deleted_groups.is_empty() && self.deleted_arrays.is_empty() - && self.updated_user_attributes.is_empty() - && self.updated_user_attributes.is_empty() + && self.updated_groups.is_empty() + && self.updated_arrays.is_empty() && self.updated_chunks.is_empty() } } diff --git a/icechunk/src/lib.rs b/icechunk/src/lib.rs index e8308cb0..beaf4d40 100644 --- a/icechunk/src/lib.rs +++ b/icechunk/src/lib.rs @@ -23,7 +23,6 @@ pub mod config; pub mod conflicts; pub mod error; pub mod format; -pub mod metadata; pub mod ops; pub mod refs; pub mod repository; diff --git a/icechunk/src/metadata/data_type.rs b/icechunk/src/metadata/data_type.rs deleted file mode 100644 index 4349c5f2..00000000 --- a/icechunk/src/metadata/data_type.rs +++ /dev/null @@ -1,99 +0,0 @@ -use std::fmt::Display; - -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -#[non_exhaustive] -#[serde(rename_all = "lowercase")] -pub enum DataType { - Bool, - Int8, - Int16, - Int32, - Int64, - UInt8, - UInt16, - UInt32, - UInt64, - Float16, - Float32, - Float64, - Complex64, - Complex128, - String, - Bytes, -} - -impl DataType { - pub fn fits_i64(&self, n: i64) -> bool { - use DataType::*; - match self { - Int8 => n >= i8::MIN as i64 && n <= i8::MAX as i64, - Int16 => n >= i16::MIN as i64 && n <= i16::MAX as i64, - Int32 => n >= i32::MIN as i64 && n <= i32::MAX as i64, - Int64 => true, - _ => false, - } - } - - pub fn fits_u64(&self, n: u64) -> bool { - use DataType::*; - match self { - UInt8 => n >= u8::MIN as u64 && n <= u8::MAX as u64, - UInt16 => n >= u16::MIN as u64 && n <= u16::MAX as u64, - UInt32 => n >= u32::MIN as u64 && n <= u32::MAX as u64, - UInt64 => true, - _ => false, - } - } -} - -impl TryFrom<&str> for DataType { - type Error = &'static str; - - fn try_from(value: &str) -> Result { - match value { - "bool" => Ok(DataType::Bool), - "int8" => Ok(DataType::Int8), - "int16" => Ok(DataType::Int16), - "int32" => Ok(DataType::Int32), - "int64" => Ok(DataType::Int64), - "uint8" => Ok(DataType::UInt8), - "uint16" => Ok(DataType::UInt16), - "uint32" => Ok(DataType::UInt32), - "uint64" => Ok(DataType::UInt64), - "float16" => Ok(DataType::Float16), - "float32" => Ok(DataType::Float32), - "float64" => Ok(DataType::Float64), - "complex64" => Ok(DataType::Complex64), - "complex128" => Ok(DataType::Complex128), - "string" => Ok(DataType::String), - "bytes" => Ok(DataType::Bytes), - _ => Err("Unknown data type, cannot parse"), - } - } -} - -impl Display for DataType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use DataType::*; - match self { - Bool => f.write_str("bool"), - Int8 => f.write_str("int8"), - Int16 => f.write_str("int16"), - Int32 => f.write_str("int32"), - Int64 => f.write_str("int64"), - UInt8 => f.write_str("uint8"), - UInt16 => f.write_str("uint16"), - UInt32 => f.write_str("uint32"), - UInt64 => f.write_str("uint64"), - Float16 => f.write_str("float16"), - Float32 => f.write_str("float32"), - Float64 => f.write_str("float64"), - Complex64 => f.write_str("complex64"), - Complex128 => f.write_str("complex128"), - String => f.write_str("string"), - Bytes => f.write_str("bytes"), - } - } -} diff --git a/icechunk/src/metadata/fill_value.rs b/icechunk/src/metadata/fill_value.rs deleted file mode 100644 index e107d753..00000000 --- a/icechunk/src/metadata/fill_value.rs +++ /dev/null @@ -1,284 +0,0 @@ -use itertools::Itertools; -use serde::{Deserialize, Serialize}; -use test_strategy::Arbitrary; - -use crate::format::{IcechunkFormatError, IcechunkFormatErrorKind}; - -use super::DataType; - -#[derive(Arbitrary, Clone, Debug, PartialEq, Serialize, Deserialize)] -pub enum FillValue { - // FIXME: test all json (de)serializations - Bool(bool), - Int8(i8), - Int16(i16), - Int32(i32), - Int64(i64), - UInt8(u8), - UInt16(u16), - UInt32(u32), - UInt64(u64), - Float16(f32), - Float32(f32), - Float64(f64), - Complex64(f32, f32), - Complex128(f64, f64), - String(String), - Bytes(Vec), -} - -impl FillValue { - pub const NAN_STR: &'static str = "NaN"; - pub const INF_STR: &'static str = "Infinity"; - pub const NEG_INF_STR: &'static str = "-Infinity"; - - pub fn from_data_type_and_json( - dt: &DataType, - value: &serde_json::Value, - ) -> Result { - #![allow(clippy::expect_used)] // before calling `as_foo` we check with `fits_foo` - match (dt, value) { - (DataType::Bool, serde_json::Value::Bool(b)) => Ok(FillValue::Bool(*b)), - (DataType::Int8, serde_json::Value::Number(n)) - if n.as_i64().map(|n| dt.fits_i64(n)) == Some(true) => - { - Ok(FillValue::Int8( - n.as_i64().expect("bug in from_data_type_and_json") as i8 - )) - } - (DataType::Int16, serde_json::Value::Number(n)) - if n.as_i64().map(|n| dt.fits_i64(n)) == Some(true) => - { - Ok(FillValue::Int16( - n.as_i64().expect("bug in from_data_type_and_json") as i16 - )) - } - (DataType::Int32, serde_json::Value::Number(n)) - if n.as_i64().map(|n| dt.fits_i64(n)) == Some(true) => - { - Ok(FillValue::Int32( - n.as_i64().expect("bug in from_data_type_and_json") as i32 - )) - } - (DataType::Int64, serde_json::Value::Number(n)) - if n.as_i64().map(|n| dt.fits_i64(n)) == Some(true) => - { - Ok(FillValue::Int64(n.as_i64().expect("bug in from_data_type_and_json"))) - } - (DataType::UInt8, serde_json::Value::Number(n)) - if n.as_u64().map(|n| dt.fits_u64(n)) == Some(true) => - { - Ok(FillValue::UInt8( - n.as_u64().expect("bug in from_data_type_and_json") as u8 - )) - } - (DataType::UInt16, serde_json::Value::Number(n)) - if n.as_u64().map(|n| dt.fits_u64(n)) == Some(true) => - { - Ok(FillValue::UInt16( - n.as_u64().expect("bug in from_data_type_and_json") as u16 - )) - } - (DataType::UInt32, serde_json::Value::Number(n)) - if n.as_u64().map(|n| dt.fits_u64(n)) == Some(true) => - { - Ok(FillValue::UInt32( - n.as_u64().expect("bug in from_data_type_and_json") as u32 - )) - } - (DataType::UInt64, serde_json::Value::Number(n)) - if n.as_u64().map(|n| dt.fits_u64(n)) == Some(true) => - { - Ok(FillValue::UInt64(n.as_u64().expect("bug in from_data_type_and_json"))) - } - (DataType::Float16, serde_json::Value::Number(n)) if n.as_f64().is_some() => { - // FIXME: limits logic - Ok(FillValue::Float16( - n.as_f64().expect("bug in from_data_type_and_json") as f32, - )) - } - (DataType::Float16, serde_json::Value::String(s)) - if s.as_str() == FillValue::NAN_STR => - { - Ok(FillValue::Float16(f32::NAN)) - } - (DataType::Float16, serde_json::Value::String(s)) - if s.as_str() == FillValue::INF_STR => - { - Ok(FillValue::Float16(f32::INFINITY)) - } - (DataType::Float16, serde_json::Value::String(s)) - if s.as_str() == FillValue::NEG_INF_STR => - { - Ok(FillValue::Float16(f32::NEG_INFINITY)) - } - - (DataType::Float32, serde_json::Value::Number(n)) if n.as_f64().is_some() => { - // FIXME: limits logic - Ok(FillValue::Float32( - n.as_f64().expect("bug in from_data_type_and_json") as f32, - )) - } - (DataType::Float32, serde_json::Value::String(s)) - if s.as_str() == FillValue::NAN_STR => - { - Ok(FillValue::Float32(f32::NAN)) - } - (DataType::Float32, serde_json::Value::String(s)) - if s.as_str() == FillValue::INF_STR => - { - Ok(FillValue::Float32(f32::INFINITY)) - } - (DataType::Float32, serde_json::Value::String(s)) - if s.as_str() == FillValue::NEG_INF_STR => - { - Ok(FillValue::Float32(f32::NEG_INFINITY)) - } - - (DataType::Float64, serde_json::Value::Number(n)) if n.as_f64().is_some() => { - // FIXME: limits logic - Ok(FillValue::Float64( - n.as_f64().expect("bug in from_data_type_and_json"), - )) - } - (DataType::Float64, serde_json::Value::String(s)) - if s.as_str() == FillValue::NAN_STR => - { - Ok(FillValue::Float64(f64::NAN)) - } - (DataType::Float64, serde_json::Value::String(s)) - if s.as_str() == FillValue::INF_STR => - { - Ok(FillValue::Float64(f64::INFINITY)) - } - (DataType::Float64, serde_json::Value::String(s)) - if s.as_str() == FillValue::NEG_INF_STR => - { - Ok(FillValue::Float64(f64::NEG_INFINITY)) - } - (DataType::Complex64, serde_json::Value::Array(arr)) if arr.len() == 2 => { - let r = FillValue::from_data_type_and_json(&DataType::Float32, &arr[0])?; - let i = FillValue::from_data_type_and_json(&DataType::Float32, &arr[1])?; - match (r, i) { - (FillValue::Float32(r), FillValue::Float32(i)) => { - Ok(FillValue::Complex64(r, i)) - } - _ => Err(IcechunkFormatErrorKind::FillValueParse { - data_type: dt.clone(), - value: value.clone(), - } - .into()), - } - } - (DataType::Complex128, serde_json::Value::Array(arr)) if arr.len() == 2 => { - let r = FillValue::from_data_type_and_json(&DataType::Float64, &arr[0])?; - let i = FillValue::from_data_type_and_json(&DataType::Float64, &arr[1])?; - match (r, i) { - (FillValue::Float64(r), FillValue::Float64(i)) => { - Ok(FillValue::Complex128(r, i)) - } - _ => Err(IcechunkFormatErrorKind::FillValueParse { - data_type: dt.clone(), - value: value.clone(), - } - .into()), - } - } - - (DataType::String, serde_json::Value::String(s)) => { - Ok(FillValue::String(s.clone())) - } - - (DataType::Bytes, serde_json::Value::Array(arr)) => { - let bytes = arr - .iter() - .map(|b| FillValue::from_data_type_and_json(&DataType::UInt8, b)) - .collect::, _>>()?; - Ok(FillValue::Bytes( - bytes - .iter() - .map(|b| match b { - FillValue::UInt8(n) => Ok::<_, IcechunkFormatError>(*n), - _ => Err(IcechunkFormatErrorKind::FillValueParse { - data_type: dt.clone(), - value: value.clone(), - } - .into()), - }) - .try_collect()?, - )) - } - - _ => Err(IcechunkFormatErrorKind::FillValueParse { - data_type: dt.clone(), - value: value.clone(), - } - .into()), - } - } - - pub fn get_data_type(&self) -> DataType { - match self { - FillValue::Bool(_) => DataType::Bool, - FillValue::Int8(_) => DataType::Int8, - FillValue::Int16(_) => DataType::Int16, - FillValue::Int32(_) => DataType::Int32, - FillValue::Int64(_) => DataType::Int64, - FillValue::UInt8(_) => DataType::UInt8, - FillValue::UInt16(_) => DataType::UInt16, - FillValue::UInt32(_) => DataType::UInt32, - FillValue::UInt64(_) => DataType::UInt64, - FillValue::Float16(_) => DataType::Float16, - FillValue::Float32(_) => DataType::Float32, - FillValue::Float64(_) => DataType::Float64, - FillValue::Complex64(_, _) => DataType::Complex64, - FillValue::Complex128(_, _) => DataType::Complex128, - FillValue::String(_) => DataType::String, - FillValue::Bytes(_) => DataType::Bytes, - } - } -} - -#[cfg(test)] -#[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] -mod tests { - use super::*; - - #[test] - fn test_nan_inf_parsing() { - assert_eq!( - FillValue::from_data_type_and_json(&DataType::Float64, &55f64.into()) - .unwrap(), - FillValue::Float64(55.0) - ); - - assert_eq!( - FillValue::from_data_type_and_json(&DataType::Float64, &55.into()).unwrap(), - FillValue::Float64(55.0) - ); - - assert!(matches!(FillValue::from_data_type_and_json( - &DataType::Float64, - &"NaN".into() - ) - .unwrap(), - FillValue::Float64(n) if n.is_nan() - )); - - assert!(matches!(FillValue::from_data_type_and_json( - &DataType::Float64, - &"Infinity".into() - ) - .unwrap(), - FillValue::Float64(n) if n == f64::INFINITY - )); - - assert!(matches!(FillValue::from_data_type_and_json( - &DataType::Float64, - &"-Infinity".into() - ) - .unwrap(), - FillValue::Float64(n) if n == f64::NEG_INFINITY - )); - } -} diff --git a/icechunk/src/metadata/mod.rs b/icechunk/src/metadata/mod.rs deleted file mode 100644 index 4735b527..00000000 --- a/icechunk/src/metadata/mod.rs +++ /dev/null @@ -1,83 +0,0 @@ -use std::{collections::HashMap, num::NonZeroU64}; - -use bytes::Bytes; -use serde::{Deserialize, Serialize}; -use test_strategy::Arbitrary; - -pub mod data_type; -pub mod fill_value; - -pub use data_type::DataType; -pub use fill_value::FillValue; - -/// The shape of an array. -/// 0 is a valid shape member -pub type ArrayShape = Vec; -// each dimension name can be null in Zarr -pub type DimensionName = Option; -pub type DimensionNames = Vec; - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct Codec { - pub name: String, - pub configuration: Option>, -} - -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -pub struct StorageTransformer { - pub name: String, - pub configuration: Option>, -} - -#[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] -pub struct ChunkShape(pub Vec); - -#[derive(Arbitrary, Copy, Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] -pub enum ChunkKeyEncoding { - Slash, - Dot, - Default, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct UserAttributes { - #[serde(flatten)] - pub parsed: serde_json::Value, -} - -impl UserAttributes { - pub fn try_new(json: &[u8]) -> Result { - serde_json::from_slice(json).map(|json| UserAttributes { parsed: json }) - } - - pub fn to_bytes(&self) -> Bytes { - // We can unwrap because a Value is always valid json - #[allow(clippy::expect_used)] - serde_json::to_vec(&self.parsed) - .expect("Bug in UserAttributes serialization") - .into() - } -} - -impl TryFrom for ChunkKeyEncoding { - type Error = &'static str; - - fn try_from(value: u8) -> Result { - match value { - b'/' => Ok(ChunkKeyEncoding::Slash), - b'.' => Ok(ChunkKeyEncoding::Dot), - b'x' => Ok(ChunkKeyEncoding::Default), - _ => Err("Invalid chunk key encoding character"), - } - } -} - -impl From for u8 { - fn from(value: ChunkKeyEncoding) -> Self { - match value { - ChunkKeyEncoding::Slash => b'/', - ChunkKeyEncoding::Dot => b'.', - ChunkKeyEncoding::Default => b'x', - } - } -} diff --git a/icechunk/src/repository.rs b/icechunk/src/repository.rs index bd4f3657..86b35e9c 100644 --- a/icechunk/src/repository.rs +++ b/icechunk/src/repository.rs @@ -710,7 +710,7 @@ impl Repository { } Ok(node) => match node.node_data { NodeData::Group => {} - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { for manifest in manifests { if !loaded_manifests.contains(&manifest.object_id) { let manifest_id = manifest.object_id; @@ -823,9 +823,7 @@ pub async fn raise_if_invalid_snapshot_id( #[cfg(test)] #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] mod tests { - use std::{ - collections::HashMap, error::Error, num::NonZeroU64, path::PathBuf, sync::Arc, - }; + use std::{collections::HashMap, error::Error, path::PathBuf, sync::Arc}; use storage::logging::LoggingStorage; use tempfile::TempDir; @@ -834,10 +832,7 @@ mod tests { config::{ CachingConfig, ManifestConfig, ManifestPreloadConfig, RepositoryConfig, }, - format::{manifest::ChunkPayload, snapshot::ZarrArrayMetadata, ChunkIndices}, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, + format::{manifest::ChunkPayload, snapshot::ArrayShape, ChunkIndices}, new_local_filesystem_storage, storage::new_in_memory_storage, Repository, Storage, @@ -1030,34 +1025,48 @@ mod tests { let mut session = repository.writable_session("main").await?; - session.add_group(Path::root()).await?; - - let zarr_meta = ZarrArrayMetadata { - shape: vec![1_000, 1, 1], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + let def = Bytes::from_static(br#"{"this":"array"}"#); + session.add_group(Path::root(), def.clone()).await?; + + let shape = ArrayShape::new(vec![(1_000, 1), (1, 1), (1, 1)]).unwrap(); + let dimension_names = Some(vec!["t".into()]); let time_path: Path = "/time".try_into().unwrap(); let temp_path: Path = "/temperature".try_into().unwrap(); let lat_path: Path = "/latitude".try_into().unwrap(); let lon_path: Path = "/longitude".try_into().unwrap(); - session.add_array(time_path.clone(), zarr_meta.clone()).await?; - session.add_array(temp_path.clone(), zarr_meta.clone()).await?; - session.add_array(lat_path.clone(), zarr_meta.clone()).await?; - session.add_array(lon_path.clone(), zarr_meta.clone()).await?; + session + .add_array( + time_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; + session + .add_array( + temp_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; + session + .add_array( + lat_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; + session + .add_array( + lon_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; session .set_chunk_ref( @@ -1139,11 +1148,11 @@ mod tests { let ops = logging.fetch_operations(); let lat_manifest_id = match session.get_node(&lat_path).await?.node_data { - NodeData::Array(_, vec) => vec[0].object_id.to_string(), + NodeData::Array { manifests, .. } => manifests[0].object_id.to_string(), NodeData::Group => panic!(), }; let lon_manifest_id = match session.get_node(&lon_path).await?.node_data { - NodeData::Array(_, vec) => vec[0].object_id.to_string(), + NodeData::Array { manifests, .. } => manifests[0].object_id.to_string(), NodeData::Group => panic!(), }; assert_eq!(ops[0].0, "fetch_snapshot"); diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index f610bc29..88cba53a 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -21,7 +21,7 @@ use tracing::{debug, info, instrument, trace, warn, Instrument}; use crate::{ asset_manager::AssetManager, - change_set::ChangeSet, + change_set::{ArrayData, ChangeSet}, conflicts::{Conflict, ConflictResolution, ConflictSolver}, error::ICError, format::{ @@ -31,14 +31,13 @@ use crate::{ VirtualReferenceErrorKind, }, snapshot::{ - ManifestFileInfo, NodeData, NodeSnapshot, NodeType, Snapshot, - SnapshotProperties, UserAttributesSnapshot, ZarrArrayMetadata, + ArrayShape, DimensionName, ManifestFileInfo, NodeData, NodeSnapshot, + NodeType, Snapshot, SnapshotProperties, }, transaction_log::{Diff, DiffBuilder, TransactionLog}, ByteRange, ChunkIndices, ChunkOffset, IcechunkFormatError, IcechunkFormatErrorKind, ManifestId, NodeId, ObjectId, Path, SnapshotId, }, - metadata::UserAttributes, refs::{fetch_branch_tip, update_branch, RefError, RefErrorKind}, repository::{RepositoryError, RepositoryErrorKind}, storage::{self, StorageErrorKind}, @@ -272,12 +271,16 @@ impl Session { /// Add a group to the store. /// /// Calling this only records the operation in memory, doesn't have any consequence on the storage - #[instrument(skip(self))] - pub async fn add_group(&mut self, path: Path) -> SessionResult<()> { + #[instrument(skip(self, definition))] + pub async fn add_group( + &mut self, + path: Path, + definition: Bytes, + ) -> SessionResult<()> { match self.get_node(&path).await { Err(SessionError { kind: SessionErrorKind::NodeNotFound { .. }, .. }) => { let id = NodeId::random(); - self.change_set.add_group(path.clone(), id); + self.change_set.add_group(path.clone(), id, definition); Ok(()) } Ok(node) => Err(SessionErrorKind::AlreadyExists { @@ -295,9 +298,9 @@ impl Session { NodeSnapshot { node_data: NodeData::Group, path: node_path, .. } => { Ok(self.delete_group(node_path).await?) } - NodeSnapshot { node_data: NodeData::Array(..), path: node_path, .. } => { - Ok(self.delete_array(node_path).await?) - } + NodeSnapshot { + node_data: NodeData::Array { .. }, path: node_path, .. + } => Ok(self.delete_array(node_path).await?), } } /// Delete a group in the hierarchy @@ -332,16 +335,22 @@ impl Session { /// Add an array to the store. /// /// Calling this only records the operation in memory, doesn't have any consequence on the storage - #[instrument(skip(self, metadata))] + #[instrument(skip(self, user_data))] pub async fn add_array( &mut self, path: Path, - metadata: ZarrArrayMetadata, + shape: ArrayShape, + dimension_names: Option>, + user_data: Bytes, ) -> SessionResult<()> { match self.get_node(&path).await { Err(SessionError { kind: SessionErrorKind::NodeNotFound { .. }, .. }) => { let id = NodeId::random(); - self.change_set.add_array(path, id, metadata); + self.change_set.add_array( + path, + id, + ArrayData { shape, dimension_names, user_data }, + ); Ok(()) } Ok(node) => Err(SessionErrorKind::AlreadyExists { @@ -356,15 +365,35 @@ impl Session { // Updates an array Zarr metadata /// /// Calling this only records the operation in memory, doesn't have any consequence on the storage - #[instrument(skip(self, metadata))] + #[instrument(skip(self, user_data))] pub async fn update_array( &mut self, - path: Path, - metadata: ZarrArrayMetadata, + path: &Path, + shape: ArrayShape, + dimension_names: Option>, + user_data: Bytes, + ) -> SessionResult<()> { + self.get_array(path).await.map(|node| { + self.change_set.update_array( + &node.id, + path, + ArrayData { shape, dimension_names, user_data }, + ) + }) + } + + // Updates an group Zarr metadata + /// + /// Calling this only records the operation in memory, doesn't have any consequence on the storage + #[instrument(skip(self, definition))] + pub async fn update_group( + &mut self, + path: &Path, + definition: Bytes, ) -> SessionResult<()> { - self.get_array(&path) + self.get_group(path) .await - .map(|node| self.change_set.update_array(node.id, metadata)) + .map(|node| self.change_set.update_group(&node.id, path, definition)) } /// Delete an array in the hierarchy @@ -395,18 +424,6 @@ impl Session { Ok(()) } - /// Record the write or delete of user attributes to array or group - #[instrument(skip(self, atts))] - pub async fn set_user_attributes( - &mut self, - path: Path, - atts: Option, - ) -> SessionResult<()> { - let node = self.get_node(&path).await?; - self.change_set.update_user_attributes(node.id, atts); - Ok(()) - } - // Record the write, referencing or delete of a chunk // // Caller has to write the chunk before calling this. @@ -430,8 +447,8 @@ impl Session { coord: ChunkIndices, data: Option, ) -> SessionResult<()> { - if let NodeData::Array(zarr_metadata, _) = node.node_data { - if zarr_metadata.valid_chunk_coord(&coord) { + if let NodeData::Array { shape, .. } = node.node_data { + if shape.valid_chunk_coord(&coord) { self.change_set.set_chunk_ref(node.id, coord, data); Ok(()) } else { @@ -474,7 +491,7 @@ impl Session { pub async fn get_array(&self, path: &Path) -> SessionResult { match self.get_node(path).await { - res @ Ok(NodeSnapshot { node_data: NodeData::Array(..), .. }) => res, + res @ Ok(NodeSnapshot { node_data: NodeData::Array { .. }, .. }) => res, Ok(node @ NodeSnapshot { .. }) => Err(SessionErrorKind::NotAnArray { node, message: "getting an array".to_string(), @@ -525,7 +542,7 @@ impl Session { message: "getting chunk reference".to_string(), } .into()), - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { // check the chunks modified in this session first // TODO: I hate rust forces me to clone to search in a hashmap. How to do better? let session_chunk = @@ -1050,7 +1067,7 @@ async fn verified_node_chunk_iterator<'a>( ) -> impl Stream> + 'a { match node.node_data { NodeData::Group => futures::future::Either::Left(futures::stream::empty()), - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { let new_chunk_indices: Box> = Box::new( change_set .array_chunks_iterator(&node.id, &node.path) @@ -1244,25 +1261,31 @@ async fn get_existing_node( .into(), err => SessionError::from(err), })?; - let session_atts = change_set - .get_user_attributes(&node.id) - .cloned() - .map(|a| a.map(UserAttributesSnapshot::Inline)); - let res = NodeSnapshot { - user_attributes: session_atts.unwrap_or_else(|| node.user_attributes.clone()), - ..node.clone() - }; - if let Some(session_meta) = change_set.get_updated_zarr_metadata(&node.id).cloned() { - if let NodeData::Array(_, manifests) = res.node_data { - Ok(NodeSnapshot { - node_data: NodeData::Array(session_meta, manifests), - ..res - }) - } else { - Ok(res) + + match node.node_data { + NodeData::Array { ref manifests, .. } => { + if let Some(new_data) = change_set.get_updated_array(&node.id) { + let node_data = NodeData::Array { + shape: new_data.shape.clone(), + dimension_names: new_data.dimension_names.clone(), + manifests: manifests.clone(), + }; + Ok(NodeSnapshot { + user_data: new_data.user_data.clone(), + node_data, + ..node + }) + } else { + Ok(node) + } + } + NodeData::Group => { + if let Some(updated_definition) = change_set.get_updated_group(&node.id) { + Ok(NodeSnapshot { user_data: updated_definition.clone(), ..node }) + } else { + Ok(node) + } } - } else { - Ok(res) } } @@ -1428,7 +1451,7 @@ impl<'a> FlushProcess<'a> { /// Record the previous manifests for an array that was not modified in the session fn copy_previous_manifest(&mut self, node: &NodeSnapshot, old_snapshot: &Snapshot) { match &node.node_data { - NodeData::Array(_, array_refs) => { + NodeData::Array { manifests: array_refs, .. } => { self.manifest_files.extend(array_refs.iter().map(|mr| { // It's ok to unwrap here, the snapshot had the node, it has to have the // manifest file info @@ -1510,12 +1533,17 @@ async fn flush( .map_ok(|node| { let id = &node.id; // TODO: many clones - if let NodeData::Array(meta, _) = node.node_data { + if let NodeData::Array { shape, dimension_names, .. } = node.node_data { NodeSnapshot { - node_data: NodeData::Array( - meta.clone(), - flush_data.manifest_refs.get(id).cloned().unwrap_or_default(), - ), + node_data: NodeData::Array { + shape, + dimension_names, + manifests: flush_data + .manifest_refs + .get(id) + .cloned() + .unwrap_or_default(), + }, ..node } } else { @@ -1532,7 +1560,6 @@ async fn flush( message.to_string(), Some(properties), flush_data.manifest_files.into_iter().collect(), - vec![], all_nodes.into_iter().map(Ok::<_, Infallible>), )?; @@ -1712,7 +1739,7 @@ fn aggregate_extents<'a, T: std::fmt::Debug, E>( #[cfg(test)] #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] mod tests { - use std::{collections::HashMap, error::Error, num::NonZeroU64}; + use std::{collections::HashMap, error::Error}; use crate::{ conflicts::{ @@ -1720,14 +1747,11 @@ mod tests { detector::ConflictDetector, }, format::manifest::ManifestExtents, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, refs::{fetch_ref, Ref}, repository::VersionInfo, - storage::{logging::LoggingStorage, new_in_memory_storage}, + storage::new_in_memory_storage, strategies::{ - chunk_indices, empty_writable_session, node_paths, zarr_array_metadata, + chunk_indices, empty_writable_session, node_paths, shapes_and_dims, ShapeDim, }, ObjectStorage, Repository, }; @@ -1736,6 +1760,7 @@ mod tests { use itertools::Itertools; use pretty_assertions::assert_eq; use proptest::prelude::{prop_assert, prop_assert_eq}; + use storage::logging::LoggingStorage; use test_strategy::proptest; use tokio::sync::Barrier; @@ -1750,11 +1775,13 @@ mod tests { #[strategy(node_paths())] path: Path, #[strategy(empty_writable_session())] mut session: Session, ) { + let user_data = Bytes::new(); + // getting any path from an empty repository must fail prop_assert!(session.get_node(&path).await.is_err()); // adding a new group must succeed - prop_assert!(session.add_group(path.clone()).await.is_ok()); + prop_assert!(session.add_group(path.clone(), user_data.clone()).await.is_ok()); // Getting a group just added must succeed let node = session.get_node(&path).await; @@ -1765,7 +1792,7 @@ mod tests { // adding an existing group fails let matches = matches!( - session.add_group(path.clone()).await.unwrap_err(), + session.add_group(path.clone(), user_data.clone()).await.unwrap_err(), SessionError{kind: SessionErrorKind::AlreadyExists{node, ..},..} if node.path == path ); prop_assert!(matches); @@ -1780,7 +1807,7 @@ mod tests { prop_assert!(session.get_node(&path).await.is_err()); // adding again must succeed - prop_assert!(session.add_group(path.clone()).await.is_ok()); + prop_assert!(session.add_group(path.clone(), user_data.clone()).await.is_ok()); // deleting again must succeed prop_assert!(session.delete_group(path.clone()).await.is_ok()); @@ -1789,14 +1816,30 @@ mod tests { #[proptest(async = "tokio")] async fn test_add_delete_array( #[strategy(node_paths())] path: Path, - #[strategy(zarr_array_metadata())] metadata: ZarrArrayMetadata, + #[strategy(shapes_and_dims(None))] metadata: ShapeDim, #[strategy(empty_writable_session())] mut session: Session, ) { // new array must always succeed - prop_assert!(session.add_array(path.clone(), metadata.clone()).await.is_ok()); + prop_assert!(session + .add_array( + path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ) + .await + .is_ok()); // adding to the same path must fail - prop_assert!(session.add_array(path.clone(), metadata.clone()).await.is_err()); + prop_assert!(session + .add_array( + path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ) + .await + .is_err()); // first delete must succeed prop_assert!(session.delete_array(path.clone()).await.is_ok()); @@ -1805,7 +1848,15 @@ mod tests { prop_assert!(session.delete_array(path.clone()).await.is_ok()); // adding again must succeed - prop_assert!(session.add_array(path.clone(), metadata.clone()).await.is_ok()); + prop_assert!(session + .add_array( + path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ) + .await + .is_ok()); // deleting again must succeed prop_assert!(session.delete_array(path.clone()).await.is_ok()); @@ -1814,13 +1865,21 @@ mod tests { #[proptest(async = "tokio")] async fn test_add_array_group_clash( #[strategy(node_paths())] path: Path, - #[strategy(zarr_array_metadata())] metadata: ZarrArrayMetadata, + #[strategy(shapes_and_dims(None))] metadata: ShapeDim, #[strategy(empty_writable_session())] mut session: Session, ) { // adding a group at an existing array node must fail - prop_assert!(session.add_array(path.clone(), metadata.clone()).await.is_ok()); + prop_assert!(session + .add_array( + path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ) + .await + .is_ok()); let matches = matches!( - session.add_group(path.clone()).await.unwrap_err(), + session.add_group(path.clone(), Bytes::new()).await.unwrap_err(), SessionError{kind: SessionErrorKind::AlreadyExists{node, ..},..} if node.path == path ); prop_assert!(matches); @@ -1833,9 +1892,13 @@ mod tests { prop_assert!(session.delete_array(path.clone()).await.is_ok()); // adding an array at an existing group node must fail - prop_assert!(session.add_group(path.clone()).await.is_ok()); + prop_assert!(session.add_group(path.clone(), Bytes::new()).await.is_ok()); let matches = matches!( - session.add_array(path.clone(), metadata.clone()).await.unwrap_err(), + session.add_array(path.clone(), + metadata.shape.clone(), + metadata.dimension_names.clone(), + Bytes::new() + ).await.unwrap_err(), SessionError{kind: SessionErrorKind::AlreadyExists{node, ..},..} if node.path == path ); prop_assert!(matches); @@ -1909,47 +1972,35 @@ mod tests { let manifest_id = manifest.id(); let manifest_size = asset_manager.write_manifest(Arc::clone(&manifest)).await?; - let zarr_meta1 = ZarrArrayMetadata { - shape: vec![2, 2, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![ - Some("x".to_string()), - Some("y".to_string()), - Some("t".to_string()), - ]), - }; + let shape = ArrayShape::new(vec![(2, 1), (2, 1), (2, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into(), "y".into(), "t".into()]); + let manifest_ref = ManifestRef { object_id: manifest_id.clone(), extents: ManifestExtents::new(&[0, 0, 0], &[1, 1, 2]), }; + + let group_def = Bytes::from_static(br#"{"some":"group"}"#); + let array_def = Bytes::from_static(br#"{"this":"array"}"#); + let array1_path: Path = "/array1".try_into().unwrap(); let node_id = NodeId::random(); let nodes = vec![ NodeSnapshot { path: Path::root(), id: node_id, - user_attributes: None, node_data: NodeData::Group, + user_data: group_def.clone(), }, NodeSnapshot { path: array1_path.clone(), id: array_id.clone(), - user_attributes: Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"foo":1}"#).unwrap(), - )), - node_data: NodeData::Array(zarr_meta1.clone(), vec![manifest_ref]), + node_data: NodeData::Array { + shape: shape.clone(), + dimension_names: dimension_names.clone(), + manifests: vec![manifest_ref.clone()], + }, + user_data: array_def.clone(), }, ]; @@ -1961,7 +2012,6 @@ mod tests { "message".to_string(), None, manifests, - vec![], nodes.iter().cloned().map(Ok::), )?); asset_manager.write_snapshot(Arc::clone(&snapshot)).await?; @@ -1985,61 +2035,75 @@ mod tests { assert_eq!(nodes.get(1).unwrap(), &node); let group_name = "/tbd-group".to_string(); - ds.add_group(group_name.clone().try_into().unwrap()).await?; + ds.add_group( + group_name.clone().try_into().unwrap(), + Bytes::copy_from_slice(b"somedef"), + ) + .await?; ds.delete_group(group_name.clone().try_into().unwrap()).await?; // deleting non-existing is no-op assert!(ds.delete_group(group_name.clone().try_into().unwrap()).await.is_ok()); assert!(ds.get_node(&group_name.try_into().unwrap()).await.is_err()); // add a new array and retrieve its node - ds.add_group("/group".try_into().unwrap()).await?; - - let zarr_meta2 = ZarrArrayMetadata { - shape: vec![3], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![NonZeroU64::new(2).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + ds.add_group("/group".try_into().unwrap(), Bytes::copy_from_slice(b"somedef2")) + .await?; + + let shape2 = ArrayShape::new(vec![(2, 2)]).unwrap(); + let dimension_names2 = Some(vec!["t".into()]); + + let array_def2 = Bytes::from_static(br#"{"this":"other array"}"#); let new_array_path: Path = "/group/array2".to_string().try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta2.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape2.clone(), + dimension_names2.clone(), + array_def2.clone(), + ) + .await?; ds.delete_array(new_array_path.clone()).await?; // Delete a non-existent array is no-op assert!(ds.delete_array(new_array_path.clone()).await.is_ok()); assert!(ds.get_node(&new_array_path.clone()).await.is_err()); - ds.add_array(new_array_path.clone(), zarr_meta2.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape2.clone(), + dimension_names2.clone(), + array_def2.clone(), + ) + .await?; let node = ds.get_node(&new_array_path).await; assert!(matches!( node.ok(), - Some(NodeSnapshot {path, user_attributes, node_data,..}) - if path== new_array_path.clone() && user_attributes.is_none() && node_data == NodeData::Array(zarr_meta2.clone(), vec![]) + Some(NodeSnapshot {path,node_data, user_data, .. }) + if path== new_array_path.clone() && + user_data == array_def2 && + node_data == NodeData::Array{shape:shape2, dimension_names:dimension_names2, manifests: vec![]} )); - // set user attributes for the new array and retrieve them - ds.set_user_attributes( - new_array_path.clone(), - Some(UserAttributes::try_new(br#"{"n":42}"#).unwrap()), + // update the array definition + let shape3 = ArrayShape::new(vec![(4, 3)]).unwrap(); + let dimension_names3 = Some(vec!["tt".into()]); + + let array_def3 = Bytes::from_static(br#"{"this":"yet other array"}"#); + ds.update_array( + &new_array_path.clone(), + shape3.clone(), + dimension_names3.clone(), + array_def3.clone(), ) .await?; let node = ds.get_node(&new_array_path).await; assert!(matches!( node.ok(), - Some(NodeSnapshot {path, user_attributes, node_data, ..}) + Some(NodeSnapshot {path,node_data, user_data, .. }) if path == "/group/array2".try_into().unwrap() && - user_attributes == Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"n":42}"#).unwrap() - )) && - node_data == NodeData::Array(zarr_meta2.clone(), vec![]) + user_data == array_def3 && + node_data == NodeData::Array { shape:shape3, dimension_names: dimension_names3, manifests: vec![] } )); let payload = ds.get_chunk_writer()(Bytes::copy_from_slice(b"foo")).await?; @@ -2053,30 +2117,21 @@ mod tests { let non_chunk = ds.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; assert_eq!(non_chunk, None); - // update old array use attributes and check them - ds.set_user_attributes( - array1_path.clone(), - Some(UserAttributes::try_new(br#"{"updated": true}"#).unwrap()), + // update old array zarr metadata and check it + let shape3 = ArrayShape::new(vec![(8, 3)]).unwrap(); + let dimension_names3 = Some(vec!["tt".into()]); + + let array_def3 = Bytes::from_static(br#"{"this":"more arrays"}"#); + ds.update_array( + &array1_path.clone(), + shape3.clone(), + dimension_names3.clone(), + array_def3.clone(), ) .await?; - let node = ds.get_node(&array1_path).await.unwrap(); - assert_eq!( - node.user_attributes, - Some(UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"updated": true}"#).unwrap() - )) - ); - - // update old array zarr metadata and check it - let new_zarr_meta1 = ZarrArrayMetadata { shape: vec![2, 2, 3], ..zarr_meta1 }; - ds.update_array(array1_path.clone(), new_zarr_meta1).await?; let node = ds.get_node(&array1_path).await; - if let Ok(NodeSnapshot { - node_data: NodeData::Array(ZarrArrayMetadata { shape, .. }, _), - .. - }) = &node - { - assert_eq!(shape, &vec![2, 2, 3]); + if let Ok(NodeSnapshot { node_data: NodeData::Array { shape, .. }, .. }) = &node { + assert_eq!(shape, &shape3); } else { panic!("Failed to update zarr metadata"); } @@ -2098,13 +2153,6 @@ mod tests { ) .await?; assert_eq!(chunk, Some(data)); - - let path: Path = "/group/array2".try_into().unwrap(); - let node = ds.get_node(&path).await; - assert!(ds.change_set.has_updated_attributes(&node.as_ref().unwrap().id)); - assert!(ds.delete_array(path.clone()).await.is_ok()); - assert!(!ds.change_set.has_updated_attributes(&node?.id)); - Ok(()) } @@ -2125,8 +2173,9 @@ mod tests { let diff = ds.status().await?; assert!(diff.is_empty()); + let user_data = Bytes::copy_from_slice(b"foo"); // add a new array and retrieve its node - ds.add_group(Path::root()).await?; + ds.add_group(Path::root(), user_data.clone()).await?; let diff = ds.status().await?; assert!(!diff.is_empty()); assert_eq!(diff.new_groups, [Path::root()].into()); @@ -2141,47 +2190,40 @@ mod tests { assert_eq!(first_commit, ds.snapshot_id); assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data: actual_user_data, node_data, .. }) + if path == Path::root() && user_data == actual_user_data && node_data == NodeData::Group )); - ds.add_group("/group".try_into().unwrap()).await?; + let user_data2 = Bytes::copy_from_slice(b"bar"); + ds.add_group("/group".try_into().unwrap(), user_data2.clone()).await?; let _snapshot_id = ds.commit("commit", Some(SnapshotProperties::default())).await?; let mut ds = repository.writable_session("main").await?; assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data: actual_user_data, node_data, .. }) + if path == Path::root() && user_data==actual_user_data && node_data == NodeData::Group )); assert!(matches!( ds.get_node(&"/group".try_into().unwrap()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == "/group".try_into().unwrap() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data:actual_user_data, node_data, .. }) + if path == "/group".try_into().unwrap() && user_data2 == actual_user_data && node_data == NodeData::Group )); - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + let shape = ArrayShape::new([(1, 1), (1, 1), (2, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into(), "y".into(), "z".into()]); + let array_user_data = Bytes::copy_from_slice(b"array"); let new_array_path: Path = "/group/array1".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape.clone(), + dimension_names.clone(), + array_user_data.clone(), + ) + .await?; let diff = ds.status().await?; assert!(!diff.is_empty()); @@ -2194,7 +2236,13 @@ mod tests { let mut ds = repository.writable_session("main").await?; let new_new_array_path: Path = "/group/array2".try_into().unwrap(); - ds.add_array(new_new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array( + new_new_array_path.clone(), + shape.clone(), + dimension_names.clone(), + array_user_data.clone(), + ) + .await?; assert!(ds.has_uncommitted_changes()); let changes = ds.discard_changes(); @@ -2222,22 +2270,26 @@ mod tests { let mut ds = repository.writable_session("main").await?; assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data: actual_user_data, node_data, .. }) + if path == Path::root() && user_data == actual_user_data && node_data == NodeData::Group )); assert!(matches!( ds.get_node(&"/group".try_into().unwrap()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, .. }) - if path == "/group".try_into().unwrap() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, user_data: actual_user_data, node_data, .. }) + if path == "/group".try_into().unwrap() && user_data2 == actual_user_data && node_data == NodeData::Group )); assert!(matches!( ds.get_node(&new_array_path).await.ok(), Some(NodeSnapshot { path, - user_attributes: None, - node_data: NodeData::Array(meta, manifests), + user_data: actual_user_data, + node_data: NodeData::Array{ shape: actual_shape, dimension_names: actual_dim, manifests }, .. - }) if path == new_array_path && meta == zarr_meta.clone() && manifests.len() == 1 + }) if path == new_array_path + && actual_user_data == array_user_data + && actual_shape == shape + && actual_dim == dimension_names + && manifests.len() == 1 )); assert_eq!( ds.get_chunk_ref(&new_array_path, &ChunkIndices(vec![0, 0, 0])).await?, @@ -2267,7 +2319,7 @@ mod tests { let snap = repository.asset_manager().fetch_snapshot(&previous_snapshot_id).await?; match &snap.get_node(&new_array_path)?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { assert_eq!( manifests.first().unwrap().extents, ManifestExtents::new(&[0, 0, 0], &[1, 1, 2]) @@ -2291,14 +2343,15 @@ mod tests { ds.set_chunk_ref(new_array_path.clone(), ChunkIndices(vec![0, 0, 1]), None) .await?; - let new_meta = ZarrArrayMetadata { shape: vec![1, 1, 1], ..zarr_meta }; + let new_shape = ArrayShape::new([(1, 1), (1, 1), (1, 1)]).unwrap(); + let new_dimension_names = Some(vec!["X".into(), "X".into(), "Z".into()]); + let new_user_data = Bytes::copy_from_slice(b"new data"); // we change zarr metadata - ds.update_array(new_array_path.clone(), new_meta.clone()).await?; - - // we change user attributes metadata - ds.set_user_attributes( - new_array_path.clone(), - Some(UserAttributes::try_new(br#"{"foo":42}"#).unwrap()), + ds.update_array( + &new_array_path.clone(), + new_shape.clone(), + new_dimension_names.clone(), + new_user_data.clone(), ) .await?; @@ -2306,7 +2359,7 @@ mod tests { let snap = repository.asset_manager().fetch_snapshot(&snapshot_id).await?; match &snap.get_node(&new_array_path)?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { assert_eq!( manifests.first().unwrap().extents, ManifestExtents::new(&[0, 0, 0], &[1, 1, 1]) @@ -2333,12 +2386,14 @@ mod tests { ds.get_node(&new_array_path).await.ok(), Some(NodeSnapshot { path, - user_attributes: Some(atts), - node_data: NodeData::Array(meta, manifests), + user_data: actual_user_data, + node_data: NodeData::Array{shape: actual_shape, dimension_names: actual_dims, manifests}, .. - }) if path == new_array_path && meta == new_meta.clone() && + }) if path == new_array_path && + actual_user_data == new_user_data && manifests.len() == 1 && - atts == UserAttributesSnapshot::Inline(UserAttributes::try_new(br#"{"foo":42}"#).unwrap()) + actual_shape == new_shape && + actual_dims == new_dimension_names )); // since we wrote every asset we should only have one fetch for the initial snapshot @@ -2385,8 +2440,8 @@ mod tests { )] .into() ); - assert_eq!(&diff.updated_user_attributes, &[new_array_path.clone()].into()); - assert_eq!(&diff.updated_zarr_metadata, &[new_array_path.clone()].into()); + assert_eq!(&diff.updated_arrays, &[new_array_path.clone()].into()); + assert_eq!(&diff.updated_groups, &[].into()); let diff = repository .diff( @@ -2411,8 +2466,8 @@ mod tests { )] .into() ); - assert_eq!(&diff.updated_user_attributes, &[new_array_path.clone()].into()); - assert_eq!(&diff.updated_zarr_metadata, &[new_array_path.clone()].into()); + assert_eq!(&diff.updated_arrays, &[new_array_path.clone()].into()); + assert_eq!(&diff.updated_groups, &[].into()); Ok(()) } @@ -2420,8 +2475,8 @@ mod tests { async fn test_basic_delete_and_flush() -> Result<(), Box> { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; - ds.add_group("/1".try_into().unwrap()).await?; + ds.add_group(Path::root(), Bytes::copy_from_slice(b"")).await?; + ds.add_group("/1".try_into().unwrap(), Bytes::copy_from_slice(b"")).await?; ds.delete_group("/1".try_into().unwrap()).await?; assert_eq!(ds.list_nodes().await?.count(), 1); ds.commit("commit", None).await?; @@ -2439,8 +2494,8 @@ mod tests { async fn test_basic_delete_after_flush() -> Result<(), Box> { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; - ds.add_group("/1".try_into().unwrap()).await?; + ds.add_group(Path::root(), Bytes::copy_from_slice(b"")).await?; + ds.add_group("/1".try_into().unwrap(), Bytes::copy_from_slice(b"")).await?; ds.commit("commit", None).await?; let mut ds = repository.writable_session("main").await?; @@ -2455,7 +2510,7 @@ mod tests { async fn test_commit_after_deleting_old_node() -> Result<(), Box> { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; + ds.add_group(Path::root(), Bytes::copy_from_slice(b"")).await?; ds.commit("commit", None).await?; let mut ds = repository.writable_session("main").await?; @@ -2471,15 +2526,16 @@ mod tests { #[tokio::test] async fn test_delete_children() -> Result<(), Box> { + let def = Bytes::copy_from_slice(b""); let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; + ds.add_group(Path::root(), def.clone()).await?; ds.commit("initialize", None).await?; let mut ds = repository.writable_session("main").await?; - ds.add_group("/a".try_into().unwrap()).await?; - ds.add_group("/b".try_into().unwrap()).await?; - ds.add_group("/b/bb".try_into().unwrap()).await?; + ds.add_group("/a".try_into().unwrap(), def.clone()).await?; + ds.add_group("/b".try_into().unwrap(), def.clone()).await?; + ds.add_group("/b/bb".try_into().unwrap(), def.clone()).await?; ds.delete_group("/b".try_into().unwrap()).await?; assert!(ds.get_group(&"/b".try_into().unwrap()).await.is_err()); @@ -2500,10 +2556,11 @@ mod tests { async fn test_delete_children_of_old_nodes() -> Result<(), Box> { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group(Path::root()).await?; - ds.add_group("/a".try_into().unwrap()).await?; - ds.add_group("/b".try_into().unwrap()).await?; - ds.add_group("/b/bb".try_into().unwrap()).await?; + let def = Bytes::copy_from_slice(b""); + ds.add_group(Path::root(), def.clone()).await?; + ds.add_group("/a".try_into().unwrap(), def.clone()).await?; + ds.add_group("/b".try_into().unwrap(), def.clone()).await?; + ds.add_group("/b/bb".try_into().unwrap(), def.clone()).await?; ds.commit("commit", None).await?; let mut ds = repository.writable_session("main").await?; @@ -2518,29 +2575,22 @@ mod tests { let storage: Arc = new_in_memory_storage().await?; let repo = Repository::create(None, storage, HashMap::new()).await?; let mut ds = repo.writable_session("main").await?; + let def = Bytes::copy_from_slice(b""); // add a new array and retrieve its node - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![4, 2, 4], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(1).unwrap(), - NonZeroU64::new(2).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + ds.add_group(Path::root(), def.clone()).await?; + + let shape = ArrayShape::new(vec![(4, 2), (2, 1), (4, 2)]).unwrap(); + let dimension_names = Some(vec!["t".into()]); let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; // we 3 chunks ds.set_chunk_ref( new_array_path.clone(), @@ -2614,29 +2664,19 @@ mod tests { ); let mut ds = repo.writable_session("main").await?; - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![5, 5], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + let def = Bytes::copy_from_slice(b""); + let shape = ArrayShape::new(vec![(5, 2), (5, 1)]).unwrap(); + let dimension_names = Some(vec!["t".into()]); + + ds.add_group(Path::root(), def.clone()).await?; let a1path: Path = "/array1".try_into()?; let a2path: Path = "/array2".try_into()?; - ds.add_array(a1path.clone(), zarr_meta.clone()).await?; - ds.add_array(a2path.clone(), zarr_meta.clone()).await?; + ds.add_array(a1path.clone(), shape.clone(), dimension_names.clone(), def.clone()) + .await?; + ds.add_array(a2path.clone(), shape.clone(), dimension_names.clone(), def.clone()) + .await?; let _ = ds.commit("first commit", None).await?; @@ -2698,7 +2738,7 @@ mod tests { ); let manifest_id = match ds.get_array(&a1path).await?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { manifests.first().as_ref().unwrap().object_id.clone() } NodeData::Group => panic!("must be an array"), @@ -2726,7 +2766,7 @@ mod tests { ); let manifest_id = match ds.get_array(&a1path).await?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { manifests.first().as_ref().unwrap().object_id.clone() } NodeData::Group => panic!("must be an array"), @@ -2765,7 +2805,7 @@ mod tests { ); let manifest_id = match ds.get_array(&a1path).await?.node_data { - NodeData::Array(_, manifests) => { + NodeData::Array { manifests, .. } => { manifests.first().as_ref().unwrap().object_id.clone() } NodeData::Group => panic!("must be an array"), @@ -2781,7 +2821,7 @@ mod tests { let _snap_id = ds.commit("chunk deleted", None).await?; let manifests = match ds.get_array(&a1path).await?.node_data { - NodeData::Array(_, manifests) => manifests, + NodeData::Array { manifests, .. } => manifests, NodeData::Group => panic!("must be an array"), }; assert!(manifests.is_empty()); @@ -2817,8 +2857,10 @@ mod tests { let storage_settings = storage.default_settings(); let mut ds = repo.writable_session("main").await?; + let def = Bytes::copy_from_slice(b""); + // add a new array and retrieve its node - ds.add_group(Path::root()).await?; + ds.add_group(Path::root(), def.clone()).await?; let new_snapshot_id = ds.commit("first commit", None).await?; assert_eq!( new_snapshot_id, @@ -2834,37 +2876,29 @@ mod tests { assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, ..}) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { node_data, path, ..}) + if path == Path::root() && node_data == NodeData::Group )); let mut ds = repo.writable_session("main").await?; assert!(matches!( ds.get_node(&Path::root()).await.ok(), - Some(NodeSnapshot { path, user_attributes, node_data, ..}) - if path == Path::root() && user_attributes.is_none() && node_data == NodeData::Group + Some(NodeSnapshot { path, node_data, ..}) + if path == Path::root() && node_data == NodeData::Group )); - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; + + let shape = ArrayShape::new(vec![(1, 1), (2, 1), (4, 2)]).unwrap(); + let dimension_names = Some(vec!["t".into()]); let new_array_path: Path = "/array1".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array( + new_array_path.clone(), + shape.clone(), + dimension_names.clone(), + def.clone(), + ) + .await?; ds.set_chunk_ref( new_array_path.clone(), ChunkIndices(vec![0, 0, 0]), @@ -2900,8 +2934,9 @@ mod tests { let mut ds1 = repository.writable_session("main").await?; let mut ds2 = repository.writable_session("main").await?; - ds1.add_group("/a".try_into().unwrap()).await?; - ds2.add_group("/b".try_into().unwrap()).await?; + let def = Bytes::copy_from_slice(b""); + ds1.add_group("/a".try_into().unwrap(), def.clone()).await?; + ds2.add_group("/b".try_into().unwrap(), def.clone()).await?; let barrier = Arc::new(Barrier::new(2)); let barrier_c = Arc::clone(&barrier); @@ -2963,24 +2998,12 @@ mod tests { let repo = Repository::create(None, Arc::clone(&storage), HashMap::new()).await?; let mut ds = repo.writable_session("main").await?; - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![5, 5], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: None, - dimension_names: None, - }; + let shape = ArrayShape::new(vec![(5, 2), (5, 2)]).unwrap(); + ds.add_group(Path::root(), Bytes::new()).await?; let apath: Path = "/array1".try_into()?; - ds.add_array(apath.clone(), zarr_meta.clone()).await?; + ds.add_array(apath.clone(), shape, None, Bytes::new()).await?; ds.commit("first commit", None).await?; @@ -3035,8 +3058,14 @@ mod tests { let repository = create_memory_store_repository().await; let mut ds = repository.writable_session("main").await?; - ds.add_group("/foo/bar".try_into().unwrap()).await?; - ds.add_array("/foo/bar/some-array".try_into().unwrap(), basic_meta()).await?; + ds.add_group("/foo/bar".try_into().unwrap(), Bytes::new()).await?; + ds.add_array( + "/foo/bar/some-array".try_into().unwrap(), + basic_shape(), + None, + Bytes::new(), + ) + .await?; ds.commit("create directory", None).await?; Ok(repository) @@ -3050,17 +3079,12 @@ mod tests { Ok((ds, ds2)) } - fn basic_meta() -> ZarrArrayMetadata { - ZarrArrayMetadata { - shape: vec![5], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![NonZeroU64::new(1).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - } + fn basic_shape() -> ArrayShape { + ArrayShape::new(vec![(5, 1)]).unwrap() + } + + fn user_data() -> Bytes { + Bytes::new() } fn assert_has_conflict(conflict: &Conflict, rebase_result: SessionResult<()>) { @@ -3085,10 +3109,10 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let conflict_path: Path = "/foo/bar/conflict".try_into().unwrap(); - ds1.add_group(conflict_path.clone()).await?; + ds1.add_group(conflict_path.clone(), user_data()).await?; ds1.commit("create group", None).await?; - ds2.add_array(conflict_path.clone(), basic_meta()).await?; + ds2.add_array(conflict_path.clone(), basic_shape(), None, user_data()).await?; ds2.commit("create array", None).await.unwrap_err(); assert_has_conflict( &Conflict::NewNodeConflictsWithExistingNode(conflict_path), @@ -3107,11 +3131,11 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let conflict_path: Path = "/foo/bar/conflict".try_into().unwrap(); - ds1.add_array(conflict_path.clone(), basic_meta()).await?; + ds1.add_array(conflict_path.clone(), basic_shape(), None, user_data()).await?; ds1.commit("create array", None).await?; let inner_path: Path = "/foo/bar/conflict/inner".try_into().unwrap(); - ds2.add_array(inner_path.clone(), basic_meta()).await?; + ds2.add_array(inner_path.clone(), basic_shape(), None, user_data()).await?; ds2.commit("create inner array", None).await.unwrap_err(); assert_has_conflict( &Conflict::NewNodeInInvalidGroup(conflict_path), @@ -3130,10 +3154,10 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.update_array(path.clone(), basic_meta()).await?; + ds1.update_array(&path.clone(), basic_shape(), None, user_data()).await?; ds1.commit("update array", None).await?; - ds2.update_array(path.clone(), basic_meta()).await?; + ds2.update_array(&path.clone(), basic_shape(), None, user_data()).await?; ds2.commit("update array again", None).await.unwrap_err(); assert_has_conflict( &Conflict::ZarrMetadataDoubleUpdate(path), @@ -3155,7 +3179,7 @@ mod tests { ds1.delete_array(path.clone()).await?; ds1.commit("delete array", None).await?; - ds2.update_array(path.clone(), basic_meta()).await?; + ds2.update_array(&path.clone(), basic_shape(), None, user_data()).await?; ds2.commit("update array again", None).await.unwrap_err(); assert_has_conflict( &Conflict::ZarrMetadataUpdateOfDeletedArray(path), @@ -3164,63 +3188,6 @@ mod tests { Ok(()) } - #[tokio::test()] - /// Test conflict detection - /// - /// This session: uptade user attributes - /// Previous commit: update user attributes - async fn test_conflict_detection_double_user_atts_edit() -> Result<(), Box> - { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; - ds1.commit("update array", None).await?; - - ds2.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; - ds2.commit("update array user atts", None).await.unwrap_err(); - let node_id = ds2.get_array(&path).await?.id; - assert_has_conflict( - &Conflict::UserAttributesDoubleUpdate { path, node_id }, - ds2.rebase(&ConflictDetector).await, - ); - Ok(()) - } - - #[tokio::test()] - /// Test conflict detection - /// - /// This session: uptade user attributes - /// Previous commit: delete same array - async fn test_conflict_detection_user_atts_edit_of_deleted( - ) -> Result<(), Box> { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.delete_array(path.clone()).await?; - ds1.commit("delete array", None).await?; - - ds2.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; - ds2.commit("update array user atts", None).await.unwrap_err(); - assert_has_conflict( - &Conflict::UserAttributesUpdateOfDeletedNode(path), - ds2.rebase(&ConflictDetector).await, - ); - Ok(()) - } - #[tokio::test()] /// Test conflict detection /// @@ -3231,7 +3198,7 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.update_array(path.clone(), basic_meta()).await?; + ds1.update_array(&path.clone(), basic_shape(), None, user_data()).await?; ds1.commit("update array", None).await?; let node = ds2.get_node(&path).await.unwrap(); @@ -3244,33 +3211,6 @@ mod tests { Ok(()) } - #[tokio::test()] - /// Test conflict detection - /// - /// This session: delete array - /// Previous commit: update same array user attributes - async fn test_conflict_detection_delete_when_array_user_atts_updated( - ) -> Result<(), Box> { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; - ds1.commit("update user attributes", None).await?; - - let node = ds2.get_node(&path).await.unwrap(); - ds2.delete_array(path.clone()).await?; - ds2.commit("delete array", None).await.unwrap_err(); - assert_has_conflict( - &Conflict::DeleteOfUpdatedArray { path, node_id: node.id }, - ds2.rebase(&ConflictDetector).await, - ); - Ok(()) - } - #[tokio::test()] /// Test conflict detection /// @@ -3304,16 +3244,12 @@ mod tests { /// /// This session: delete group /// Previous commit: update same group user attributes - async fn test_conflict_detection_delete_when_group_user_atts_updated( + async fn test_conflict_detection_delete_when_group_user_data_updated( ) -> Result<(), Box> { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let path: Path = "/foo/bar".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"foo":"bar"}"#).unwrap()), - ) - .await?; + ds1.update_group(&path, Bytes::new()).await?; ds1.commit("update user attributes", None).await?; let node = ds2.get_node(&path).await.unwrap(); @@ -3332,20 +3268,10 @@ mod tests { let mut ds = repo.writable_session("main").await?; - ds.add_group("/".try_into().unwrap()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![5], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![NonZeroU64::new(1).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; + ds.add_group("/".try_into().unwrap(), user_data()).await?; let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array(new_array_path.clone(), basic_shape(), None, user_data()).await?; ds.commit("create array", None).await?; // one writer sets chunks @@ -3404,20 +3330,9 @@ mod tests { let mut ds = repo.writable_session("main").await?; - ds.add_group("/".try_into().unwrap()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![5], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![NonZeroU64::new(1).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; - + ds.add_group("/".try_into().unwrap(), user_data()).await?; let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array(new_array_path.clone(), basic_shape(), None, user_data()).await?; let _array_created_snap = ds.commit("create array", None).await?; let mut ds1 = repo.writable_session("main").await?; @@ -3437,7 +3352,7 @@ mod tests { .await?; let new_array_2_path: Path = "/array_2".try_into().unwrap(); - ds1.add_array(new_array_2_path.clone(), zarr_meta.clone()).await?; + ds1.add_array(new_array_2_path.clone(), basic_shape(), None, user_data()).await?; ds1.set_chunk_ref( new_array_2_path.clone(), ChunkIndices(vec![0]), @@ -3504,217 +3419,126 @@ mod tests { false, ) .await?; - - // TODO: We can't create writable sessions from arbitrary snapshots anymore so not sure what to do about this? - // let's try to create a new commit, that conflicts with the previous one and writes - // to the same chunk, recovering with "Fail" policy (so it shouldn't recover) - // let mut repo2 = - // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); - // repo2 - // .set_chunk_ref( - // new_array_path.clone(), - // ChunkIndices(vec![1]), - // Some(ChunkPayload::Inline("overridden".into())), - // ) - // .await?; - - // if let Err(SessionError::Conflict { .. }) = - // repo2.commit("main", "write one chunk with repo2", None).await - // { - // let solver = BasicConflictSolver { - // on_chunk_conflict: VersionSelection::Fail, - // ..BasicConflictSolver::default() - // }; - - // let res = repo2.rebase(&solver, "main").await; - // assert!(matches!( - // res, - // Err(SessionError::RebaseFailed { snapshot, conflicts, }) - // if snapshot == conflicting_snap && - // conflicts.len() == 1 && - // matches!(conflicts[0], Conflict::ChunkDoubleUpdate { ref path, ref chunk_coordinates, .. } - // if path == &new_array_path && chunk_coordinates == &[ChunkIndices(vec![1])].into()) - // )); - // } else { - // panic!("Bad test, it should conflict") - // } - - // // reset the branch to what repo1 wrote - // let current_snap = fetch_branch_tip(storage.as_ref(), "main").await?.snapshot; - // update_branch( - // storage.as_ref(), - // "main", - // conflicting_snap.clone(), - // Some(¤t_snap), - // false, - // ) - // .await?; - - // // let's try to create a new commit, that conflicts with the previous one and writes - // // to the same chunk, recovering with "UseOurs" policy - // let mut repo2 = - // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); - // repo2 - // .set_chunk_ref( - // new_array_path.clone(), - // ChunkIndices(vec![1]), - // Some(ChunkPayload::Inline("overridden".into())), - // ) - // .await?; - // if let Err(SessionError::Conflict { .. }) = - // repo2.commit("main", "write one chunk with repo2", None).await - // { - // let solver = BasicConflictSolver { - // on_chunk_conflict: VersionSelection::UseOurs, - // ..Default::default() - // }; - - // repo2.rebase(&solver, "main").await?; - // repo2.commit("main", "after conflict", None).await?; - // let data = - // repo2.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; - // assert_eq!(data, Some(ChunkPayload::Inline("overridden".into()))); - // let commits = repo2.ancestry().await?.try_collect::>().await?; - // assert_eq!(commits[0].message, "after conflict"); - // assert_eq!(commits[1].message, "write two chunks with repo 1"); - // } else { - // panic!("Bad test, it should conflict") - // } - - // // reset the branch to what repo1 wrote - // let current_snap = fetch_branch_tip(storage.as_ref(), "main").await?.snapshot; - // update_branch( - // storage.as_ref(), - // "main", - // conflicting_snap.clone(), - // Some(¤t_snap), - // false, - // ) - // .await?; - - // // let's try to create a new commit, that conflicts with the previous one and writes - // // to the same chunk, recovering with "UseTheirs" policy - // let mut repo2 = - // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); - // repo2 - // .set_chunk_ref( - // new_array_path.clone(), - // ChunkIndices(vec![1]), - // Some(ChunkPayload::Inline("overridden".into())), - // ) - // .await?; - // if let Err(SessionError::Conflict { .. }) = - // repo2.commit("main", "write one chunk with repo2", None).await - // { - // let solver = BasicConflictSolver { - // on_chunk_conflict: VersionSelection::UseTheirs, - // ..Default::default() - // }; - - // repo2.rebase(&solver, "main").await?; - // repo2.commit("main", "after conflict", None).await?; - // let data = - // repo2.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; - // assert_eq!(data, Some(ChunkPayload::Inline("hello1".into()))); - // let commits = repo2.ancestry().await?.try_collect::>().await?; - // assert_eq!(commits[0].message, "after conflict"); - // assert_eq!(commits[1].message, "write two chunks with repo 1"); - // } else { - // panic!("Bad test, it should conflict") - // } - - Ok(()) - } - - #[tokio::test] - /// Test conflict resolution with rebase - /// - /// Two sessions write user attributes to the same array - /// We attempt to recover using [`VersionSelection::UseOurs`] policy - async fn test_conflict_resolution_double_user_atts_edit_with_ours( - ) -> Result<(), Box> { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"repo":1}"#).unwrap()), - ) - .await?; - ds1.commit("update array", None).await?; - - ds2.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"repo":2}"#).unwrap()), - ) - .await?; - ds2.commit("update array user atts", None).await.unwrap_err(); - - let solver = BasicConflictSolver { - on_user_attributes_conflict: VersionSelection::UseOurs, - ..Default::default() - }; - - ds2.rebase(&solver).await?; - ds2.commit("after conflict", None).await?; - - let atts = ds2.get_node(&path).await.unwrap().user_attributes.unwrap(); - assert_eq!( - atts, - UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"repo":2}"#).unwrap() - ) - ); Ok(()) } - #[tokio::test] - /// Test conflict resolution with rebase - /// - /// Two sessions write user attributes to the same array - /// We attempt to recover using [`VersionSelection::UseTheirs`] policy - async fn test_conflict_resolution_double_user_atts_edit_with_theirs( - ) -> Result<(), Box> { - let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; - - let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"repo":1}"#).unwrap()), - ) - .await?; - ds1.commit("update array", None).await?; - - // we made one extra random change to the repo, because we'll undo the user attributes - // update and we cannot commit an empty change - ds2.add_group("/baz".try_into().unwrap()).await?; - - ds2.set_user_attributes( - path.clone(), - Some(UserAttributes::try_new(br#"{"repo":2}"#).unwrap()), - ) - .await?; - ds2.commit("update array user atts", None).await.unwrap_err(); - - let solver = BasicConflictSolver { - on_user_attributes_conflict: VersionSelection::UseTheirs, - ..Default::default() - }; - - ds2.rebase(&solver).await?; - ds2.commit("after conflict", None).await?; - - let atts = ds2.get_node(&path).await.unwrap().user_attributes.unwrap(); - assert_eq!( - atts, - UserAttributesSnapshot::Inline( - UserAttributes::try_new(br#"{"repo":1}"#).unwrap() - ) - ); - - ds2.get_node(&"/baz".try_into().unwrap()).await?; - Ok(()) - } + // TODO: We can't create writable sessions from arbitrary snapshots anymore so not sure what to do about this? + // let's try to create a new commit, that conflicts with the previous one and writes + // to the same chunk, recovering with "Fail" policy (so it shouldn't recover) + // let mut repo2 = + // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); + // repo2 + // .set_chunk_ref( + // new_array_path.clone(), + // ChunkIndices(vec![1]), + // Some(ChunkPayload::Inline("overridden".into())), + // ) + // .await?; + + // if let Err(SessionError::Conflict { .. }) = + // repo2.commit("main", "write one chunk with repo2", None).await + // { + // let solver = BasicConflictSolver { + // on_chunk_conflict: VersionSelection::Fail, + // ..BasicConflictSolver::default() + // }; + + // let res = repo2.rebase(&solver, "main").await; + // assert!(matches!( + // res, + // Err(SessionError::RebaseFailed { snapshot, conflicts, }) + // if snapshot == conflicting_snap && + // conflicts.len() == 1 && + // matches!(conflicts[0], Conflict::ChunkDoubleUpdate { ref path, ref chunk_coordinates, .. } + // if path == &new_array_path && chunk_coordinates == &[ChunkIndices(vec![1])].into()) + // )); + // } else { + // panic!("Bad test, it should conflict") + // } + + // // reset the branch to what repo1 wrote + // let current_snap = fetch_branch_tip(storage.as_ref(), "main").await?.snapshot; + // update_branch( + // storage.as_ref(), + // "main", + // conflicting_snap.clone(), + // Some(¤t_snap), + // false, + // ) + // .await?; + + // // let's try to create a new commit, that conflicts with the previous one and writes + // // to the same chunk, recovering with "UseOurs" policy + // let mut repo2 = + // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); + // repo2 + // .set_chunk_ref( + // new_array_path.clone(), + // ChunkIndices(vec![1]), + // Some(ChunkPayload::Inline("overridden".into())), + // ) + // .await?; + // if let Err(SessionError::Conflict { .. }) = + // repo2.commit("main", "write one chunk with repo2", None).await + // { + // let solver = BasicConflictSolver { + // on_chunk_conflict: VersionSelection::UseOurs, + // ..Default::default() + // }; + + // repo2.rebase(&solver, "main").await?; + // repo2.commit("main", "after conflict", None).await?; + // let data = + // repo2.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; + // assert_eq!(data, Some(ChunkPayload::Inline("overridden".into()))); + // let commits = repo2.ancestry().await?.try_collect::>().await?; + // assert_eq!(commits[0].message, "after conflict"); + // assert_eq!(commits[1].message, "write two chunks with repo 1"); + // } else { + // panic!("Bad test, it should conflict") + // } + + // // reset the branch to what repo1 wrote + // let current_snap = fetch_branch_tip(storage.as_ref(), "main").await?.snapshot; + // update_branch( + // storage.as_ref(), + // "main", + // conflicting_snap.clone(), + // Some(¤t_snap), + // false, + // ) + // .await?; + + // // let's try to create a new commit, that conflicts with the previous one and writes + // // to the same chunk, recovering with "UseTheirs" policy + // let mut repo2 = + // Repository::update(Arc::clone(&storage), array_created_snap.clone()).build(); + // repo2 + // .set_chunk_ref( + // new_array_path.clone(), + // ChunkIndices(vec![1]), + // Some(ChunkPayload::Inline("overridden".into())), + // ) + // .await?; + // if let Err(SessionError::Conflict { .. }) = + // repo2.commit("main", "write one chunk with repo2", None).await + // { + // let solver = BasicConflictSolver { + // on_chunk_conflict: VersionSelection::UseTheirs, + // ..Default::default() + // }; + + // repo2.rebase(&solver, "main").await?; + // repo2.commit("main", "after conflict", None).await?; + // let data = + // repo2.get_chunk_ref(&new_array_path, &ChunkIndices(vec![1])).await?; + // assert_eq!(data, Some(ChunkPayload::Inline("hello1".into()))); + // let commits = repo2.ancestry().await?.try_collect::>().await?; + // assert_eq!(commits[0].message, "after conflict"); + // assert_eq!(commits[1].message, "write two chunks with repo 1"); + // } else { + // panic!("Bad test, it should conflict") + // } #[tokio::test] /// Test conflict resolution with rebase @@ -3727,7 +3551,7 @@ mod tests { let (mut ds1, mut ds2) = get_sessions_for_conflict().await?; let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.update_array(path.clone(), basic_meta()).await?; + ds1.update_array(&path, basic_shape(), None, user_data()).await?; ds1.commit("update array", None).await?; ds2.delete_array(path.clone()).await?; @@ -3806,12 +3630,13 @@ mod tests { let mut ds2 = repo.writable_session("main").await?; let path: Path = "/foo/bar/some-array".try_into().unwrap(); - ds1.set_user_attributes( + ds1.set_chunk_ref( path.clone(), - Some(UserAttributes::try_new(br#"{"repo":1}"#).unwrap()), + ChunkIndices(vec![1]), + Some(ChunkPayload::Inline("repo 1".into())), ) .await?; - let non_conflicting_snap = ds1.commit("update user atts", None).await?; + let non_conflicting_snap = ds1.commit("updated non-conflict chunk", None).await?; let mut ds1 = repo.writable_session("main").await?; ds1.set_chunk_ref( @@ -3860,6 +3685,7 @@ mod tests { mod state_machine_test { use crate::format::snapshot::NodeData; use crate::format::Path; + use bytes::Bytes; use futures::Future; use proptest::prelude::*; use proptest::sample; @@ -3874,16 +3700,17 @@ mod tests { use proptest::test_runner::Config; use super::create_memory_store_repository; + use super::ArrayShape; + use super::DimensionName; use super::Session; - use super::ZarrArrayMetadata; - use super::{node_paths, zarr_array_metadata}; + use super::{node_paths, shapes_and_dims}; #[derive(Clone, Debug)] enum RepositoryTransition { - AddArray(Path, ZarrArrayMetadata), - UpdateArray(Path, ZarrArrayMetadata), + AddArray(Path, ArrayShape, Option>, Bytes), + UpdateArray(Path, ArrayShape, Option>, Bytes), DeleteArray(Option), - AddGroup(Path), + AddGroup(Path, Bytes), DeleteGroup(Option), } @@ -3892,8 +3719,8 @@ mod tests { #[derive(Clone, Default, Debug)] struct RepositoryModel { - arrays: HashMap, - groups: Vec, + arrays: HashMap>, Bytes)>, + groups: HashMap, } impl ReferenceStateMachine for RepositoryStateMachine { @@ -3924,7 +3751,7 @@ mod tests { let delete_groups = { if !state.groups.is_empty() { - sample::select(state.groups.clone()) + sample::select(state.groups.keys().cloned().collect::>()) .prop_map(|p| RepositoryTransition::DeleteGroup(Some(p))) .boxed() } else { @@ -3933,12 +3760,38 @@ mod tests { }; prop_oneof![ - (node_paths(), zarr_array_metadata()) - .prop_map(|(a, b)| RepositoryTransition::AddArray(a, b)), - (node_paths(), zarr_array_metadata()) - .prop_map(|(a, b)| RepositoryTransition::UpdateArray(a, b)), + ( + node_paths(), + shapes_and_dims(None), + proptest::collection::vec(any::(), 0..=100) + ) + .prop_map(|(a, shape, user_data)| { + RepositoryTransition::AddArray( + a, + shape.shape, + shape.dimension_names, + Bytes::copy_from_slice(user_data.as_slice()), + ) + }), + ( + node_paths(), + shapes_and_dims(None), + proptest::collection::vec(any::(), 0..=100) + ) + .prop_map(|(a, shape, user_data)| { + RepositoryTransition::UpdateArray( + a, + shape.shape, + shape.dimension_names, + Bytes::copy_from_slice(user_data.as_slice()), + ) + }), delete_arrays, - node_paths().prop_map(RepositoryTransition::AddGroup), + (node_paths(), proptest::collection::vec(any::(), 0..=100)) + .prop_map(|(p, ud)| RepositoryTransition::AddGroup( + p, + Bytes::copy_from_slice(ud.as_slice()) + )), delete_groups, ] .boxed() @@ -3950,14 +3803,20 @@ mod tests { ) -> Self::State { match transition { // Array ops - RepositoryTransition::AddArray(path, metadata) => { - let res = state.arrays.insert(path.clone(), metadata.clone()); + RepositoryTransition::AddArray(path, shape, dims, ud) => { + let res = state.arrays.insert( + path.clone(), + (shape.clone(), dims.clone(), ud.clone()), + ); assert!(res.is_none()); } - RepositoryTransition::UpdateArray(path, metadata) => { + RepositoryTransition::UpdateArray(path, shape, dims, ud) => { state .arrays - .insert(path.clone(), metadata.clone()) + .insert( + path.clone(), + (shape.clone(), dims.clone(), ud.clone()), + ) .expect("(postcondition) insertion failed"); } RepositoryTransition::DeleteArray(path) => { @@ -3969,17 +3828,13 @@ mod tests { } // Group ops - RepositoryTransition::AddGroup(path) => { - state.groups.push(path.clone()); + RepositoryTransition::AddGroup(path, ud) => { + state.groups.insert(path.clone(), ud.clone()); // TODO: postcondition } RepositoryTransition::DeleteGroup(Some(path)) => { - let index = - state.groups.iter().position(|x| x == path).expect( - "Attempting to delete a non-existent path: {path}", - ); - state.groups.swap_remove(index); - state.groups.retain(|group| !group.starts_with(path)); + state.groups.remove(path); + state.groups.retain(|group, _| !group.starts_with(path)); state.arrays.retain(|array, _| !array.starts_with(path)); } _ => panic!(), @@ -3989,15 +3844,17 @@ mod tests { fn preconditions(state: &Self::State, transition: &Self::Transition) -> bool { match transition { - RepositoryTransition::AddArray(path, _) => { - !state.arrays.contains_key(path) && !state.groups.contains(path) + RepositoryTransition::AddArray(path, _, _, _) => { + !state.arrays.contains_key(path) + && !state.groups.contains_key(path) } - RepositoryTransition::UpdateArray(path, _) => { + RepositoryTransition::UpdateArray(path, _, _, _) => { state.arrays.contains_key(path) } RepositoryTransition::DeleteArray(path) => path.is_some(), - RepositoryTransition::AddGroup(path) => { - !state.arrays.contains_key(path) && !state.groups.contains(path) + RepositoryTransition::AddGroup(path, _) => { + !state.arrays.contains_key(path) + && !state.groups.contains_key(path) } RepositoryTransition::DeleteGroup(p) => p.is_some(), } @@ -4046,17 +3903,17 @@ mod tests { let runtime = &state.runtime; let repository = &mut state.session; match transition { - RepositoryTransition::AddArray(path, metadata) => { - runtime.unwrap(repository.add_array(path, metadata)) + RepositoryTransition::AddArray(path, shape, dims, ud) => { + runtime.unwrap(repository.add_array(path, shape, dims, ud)) } - RepositoryTransition::UpdateArray(path, metadata) => { - runtime.unwrap(repository.update_array(path, metadata)) + RepositoryTransition::UpdateArray(path, shape, dims, ud) => { + runtime.unwrap(repository.update_array(&path, shape, dims, ud)) } RepositoryTransition::DeleteArray(Some(path)) => { runtime.unwrap(repository.delete_array(path)) } - RepositoryTransition::AddGroup(path) => { - runtime.unwrap(repository.add_group(path)) + RepositoryTransition::AddGroup(path, ud) => { + runtime.unwrap(repository.add_group(path, ud)) } RepositoryTransition::DeleteGroup(Some(path)) => { runtime.unwrap(repository.delete_group(path)) @@ -4071,23 +3928,28 @@ mod tests { ref_state: &::State, ) { let runtime = &state.runtime; - for (path, metadata) in ref_state.arrays.iter() { + for (path, (shape, dims, ud)) in ref_state.arrays.iter() { let node = runtime.unwrap(state.session.get_array(path)); let actual_metadata = match node.node_data { - NodeData::Array(metadata, _) => Ok(metadata), + NodeData::Array { shape, dimension_names, .. } => { + Ok((shape, dimension_names)) + } _ => Err("foo"), } .unwrap(); - assert_eq!(metadata, &actual_metadata); + assert_eq!(shape, &actual_metadata.0); + assert_eq!(dims, &actual_metadata.1); + assert_eq!(ud, &node.user_data); } - for path in ref_state.groups.iter() { + for (path, ud) in ref_state.groups.iter() { let node = runtime.unwrap(state.session.get_group(path)); match node.node_data { NodeData::Group => Ok(()), _ => Err("foo"), } .unwrap(); + assert_eq!(&node.user_data, ud) } } } diff --git a/icechunk/src/store.rs b/icechunk/src/store.rs index 12ec9134..936cb3b1 100644 --- a/icechunk/src/store.rs +++ b/icechunk/src/store.rs @@ -2,7 +2,6 @@ use std::{ collections::HashSet, fmt::Display, iter, - num::NonZeroU64, ops::{Deref, DerefMut}, sync::{ atomic::{AtomicUsize, Ordering}, @@ -24,12 +23,8 @@ use crate::{ error::ICError, format::{ manifest::{ChunkPayload, VirtualChunkRef}, - snapshot::{NodeData, NodeSnapshot, UserAttributesSnapshot, ZarrArrayMetadata}, - ByteRange, ChunkIndices, ChunkOffset, IcechunkFormatError, Path, PathError, - }, - metadata::{ - ArrayShape, ChunkKeyEncoding, ChunkShape, Codec, DataType, DimensionNames, - FillValue, StorageTransformer, UserAttributes, + snapshot::{ArrayShape, DimensionName, NodeData, NodeSnapshot}, + ByteRange, ChunkIndices, ChunkOffset, Path, PathError, }, refs::{RefError, RefErrorKind}, repository::{RepositoryError, RepositoryErrorKind}, @@ -101,6 +96,8 @@ pub enum StoreErrorKind { "invalid chunk location, no matching virtual chunk container: `{chunk_location}`" )] InvalidVirtualChunkContainer { chunk_location: String }, + #[error("{0}")] + Other(String), #[error("unknown store error")] Unknown(Box), } @@ -299,12 +296,12 @@ impl Store { match Key::parse(key)? { Key::Metadata { node_path } => { if let Ok(array_meta) = serde_json::from_slice(value.as_ref()) { - self.set_array_meta(node_path, array_meta, locked_session).await + self.set_array_meta(node_path, value, array_meta, locked_session) + .await } else { - match serde_json::from_slice(value.as_ref()) { - Ok(group_meta) => { - self.set_group_meta(node_path, group_meta, locked_session) - .await + match serde_json::from_slice::(value.as_ref()) { + Ok(_) => { + self.set_group_meta(node_path, value, locked_session).await } Err(err) => Err(StoreErrorKind::BadMetadata(err).into()), } @@ -414,7 +411,7 @@ impl Store { let node = guard.get_closest_ancestor_node(&path).await; if let Ok(NodeSnapshot { path: node_path, - node_data: NodeData::Array(..), + node_data: NodeData::Array { .. }, .. }) = node { @@ -540,7 +537,7 @@ impl Store { let path = Path::try_from(absolute_prefix)?; let session = Arc::clone(&self.session).read_owned().await; let results = match session.get_node(&path).await { - Ok(NodeSnapshot { node_data: NodeData::Array(..), .. }) => { + Ok(NodeSnapshot { node_data: NodeData::Array { .. }, .. }) => { // if this is an array we know what to yield vec![ ListDirItem::Key("zarr.json".to_string()), @@ -551,7 +548,7 @@ impl Store { ListDirItem::Prefix("c".to_string()), ] } - Ok(NodeSnapshot { node_data: NodeData::Group, .. }) => { + Ok(NodeSnapshot { node_data: NodeData::Group { .. }, .. }) => { // if the prefix is the path to a group we need to discover any nodes with the prefix as node path // listing chunks is unnecessary self.list_metadata_prefix(prefix, true) @@ -647,47 +644,49 @@ impl Store { async fn set_array_meta( &self, path: Path, + user_data: Bytes, array_meta: ArrayMetadata, - locked_repo: Option<&mut Session>, + locked_session: Option<&mut Session>, ) -> Result<(), StoreError> { - match locked_repo { - Some(repo) => set_array_meta(path, array_meta, repo).await, - None => self.set_array_meta_locking(path, array_meta).await, + match locked_session { + Some(session) => set_array_meta(path, user_data, array_meta, session).await, + None => self.set_array_meta_locking(path, user_data, array_meta).await, } } async fn set_array_meta_locking( &self, path: Path, + user_data: Bytes, array_meta: ArrayMetadata, ) -> Result<(), StoreError> { // we need to hold the lock while we search the array and do the update to avoid race // conditions with other writers (notice we don't take &mut self) let mut guard = self.session.write().await; - set_array_meta(path, array_meta, guard.deref_mut()).await + set_array_meta(path, user_data, array_meta, guard.deref_mut()).await } async fn set_group_meta( &self, path: Path, - group_meta: GroupMetadata, + user_data: Bytes, locked_repo: Option<&mut Session>, ) -> Result<(), StoreError> { match locked_repo { - Some(repo) => set_group_meta(path, group_meta, repo).await, - None => self.set_group_meta_locking(path, group_meta).await, + Some(repo) => set_group_meta(path, user_data, repo).await, + None => self.set_group_meta_locking(path, user_data).await, } } async fn set_group_meta_locking( &self, path: Path, - group_meta: GroupMetadata, + user_data: Bytes, ) -> Result<(), StoreError> { // we need to hold the lock while we search the array and do the update to avoid race // conditions with other writers (notice we don't take &mut self) let mut guard = self.session.write().await; - set_group_meta(path, group_meta, guard.deref_mut()).await + set_group_meta(path, user_data, guard.deref_mut()).await } async fn list_metadata_prefix<'a, 'b: 'a>( @@ -741,60 +740,45 @@ impl Store { async fn set_array_meta( path: Path, + user_data: Bytes, array_meta: ArrayMetadata, - repo: &mut Session, + session: &mut Session, ) -> StoreResult<()> { - // TODO: Consider deleting all this logic? - // We try hard to not overwrite existing metadata here. - // I don't think this is actually useful because Zarr's - // Array/Group API will require that the user set `overwrite=True` - // which will delete any existing array metadata. This path is only - // applicable when using the explicit `store.set` interface. - if let Ok(node) = repo.get_array(&path).await { - // Check if the user attributes are different, if they are update them - let existing_attrs = match node.user_attributes { - None => None, - Some(UserAttributesSnapshot::Inline(atts)) => Some(atts), - // FIXME: implement - Some(UserAttributesSnapshot::Ref(_)) => None, - }; - - if existing_attrs != array_meta.attributes { - repo.set_user_attributes(path.clone(), array_meta.attributes).await?; - } - - // Check if the zarr metadata is different, if it is update it - if let NodeData::Array(existing_array_metadata, _) = node.node_data { - if existing_array_metadata != array_meta.zarr_metadata { - repo.update_array(path, array_meta.zarr_metadata).await?; + let shape = array_meta + .shape() + .ok_or(StoreErrorKind::Other("Invalid chunk grid metadata".to_string()))?; + if let Ok(node) = session.get_array(&path).await { + if let NodeData::Array { .. } = node.node_data { + if node.user_data != user_data { + session + .update_array(&path, shape, array_meta.dimension_names(), user_data) + .await?; } - } else { - // This should be unreachable, but just in case... - repo.update_array(path, array_meta.zarr_metadata).await?; } - + // FIXME: don't ignore error Ok(()) } else { - repo.add_array(path.clone(), array_meta.zarr_metadata).await?; - repo.set_user_attributes(path, array_meta.attributes).await?; + session + .add_array(path.clone(), shape, array_meta.dimension_names(), user_data) + .await?; Ok(()) } } async fn set_group_meta( path: Path, - group_meta: GroupMetadata, - repo: &mut Session, + user_data: Bytes, + session: &mut Session, ) -> StoreResult<()> { - // we need to hold the lock while we search the group and do the update to avoid race - // conditions with other writers (notice we don't take &mut self) - // - if repo.get_group(&path).await.is_ok() { - repo.set_user_attributes(path, group_meta.attributes).await?; + if let Ok(node) = session.get_group(&path).await { + if let NodeData::Group = node.node_data { + if node.user_data != user_data { + session.update_group(&path, user_data).await?; + } + } Ok(()) } else { - repo.add_group(path.clone()).await?; - repo.set_user_attributes(path, group_meta.attributes).await?; + session.add_group(path.clone(), user_data).await?; Ok(()) } } @@ -803,29 +787,13 @@ async fn get_metadata( _key: &str, path: &Path, range: &ByteRange, - repo: &Session, + session: &Session, ) -> StoreResult { // FIXME: don't skip errors - let node = repo.get_node(path).await.map_err(|_| { + let node = session.get_node(path).await.map_err(|_| { StoreErrorKind::NotFound(KeyNotFoundError::NodeNotFound { path: path.clone() }) })?; - let user_attributes = match node.user_attributes { - None => None, - Some(UserAttributesSnapshot::Inline(atts)) => Some(atts), - // FIXME: implement - #[allow(clippy::unimplemented)] - Some(UserAttributesSnapshot::Ref(_)) => unimplemented!(), - }; - let full_metadata = match node.node_data { - NodeData::Group => { - Ok::(GroupMetadata::new(user_attributes).to_bytes()) - } - NodeData::Array(zarr_metadata, _) => { - Ok(ArrayMetadata::new(user_attributes, zarr_metadata).to_bytes()) - } - }?; - - Ok(range.slice(full_metadata)) + Ok(range.slice(node.user_data)) } async fn get_chunk_bytes( @@ -833,9 +801,9 @@ async fn get_chunk_bytes( path: Path, coords: ChunkIndices, byte_range: &ByteRange, - repo: &Session, + session: &Session, ) -> StoreResult { - let reader = repo.get_chunk_reader(&path, &coords, byte_range).await?; + let reader = session.get_chunk_reader(&path, &coords, byte_range).await?; // then we can fetch the bytes without holding the lock let chunk = get_chunk(reader).await?; @@ -849,8 +817,12 @@ async fn get_chunk_bytes( ) } -async fn get_metadata_size(key: &str, path: &Path, repo: &Session) -> StoreResult { - let bytes = get_metadata(key, path, &ByteRange::From(0), repo).await?; +async fn get_metadata_size( + key: &str, + path: &Path, + session: &Session, +) -> StoreResult { + let bytes = get_metadata(key, path, &ByteRange::From(0), session).await?; Ok(bytes.len() as u64) } @@ -858,9 +830,9 @@ async fn get_chunk_size( _key: &str, path: &Path, coords: &ChunkIndices, - repo: &Session, + session: &Session, ) -> StoreResult { - let chunk_ref = repo.get_chunk_ref(path, coords).await?; + let chunk_ref = session.get_chunk_ref(path, coords).await?; let size = chunk_ref .map(|payload| match payload { ChunkPayload::Inline(bytes) => bytes.len() as u64, @@ -874,14 +846,14 @@ async fn get_chunk_size( async fn get_key( key: &str, byte_range: &ByteRange, - repo: &Session, + session: &Session, ) -> StoreResult { let bytes = match Key::parse(key)? { Key::Metadata { node_path } => { - get_metadata(key, &node_path, byte_range, repo).await + get_metadata(key, &node_path, byte_range, session).await } Key::Chunk { node_path, coords } => { - get_chunk_bytes(key, node_path, coords, byte_range, repo).await + get_chunk_bytes(key, node_path, coords, byte_range, session).await } Key::ZarrV2(key) => { Err(StoreErrorKind::NotFound(KeyNotFoundError::ZarrV2KeyNotFound { key }) @@ -1037,153 +1009,39 @@ impl Display for Key { #[serde_as] #[derive(Debug, Serialize, Deserialize, PartialEq)] struct ArrayMetadata { - zarr_format: u8, + pub shape: Vec, + #[serde(deserialize_with = "validate_array_node_type")] node_type: String, - #[serde(skip_serializing_if = "Option::is_none")] - attributes: Option, - #[serde(flatten)] - #[serde_as(as = "TryFromInto")] - zarr_metadata: ZarrArrayMetadata, -} - -#[serde_as] -#[derive(Serialize, Deserialize)] -pub struct ZarrArrayMetadataSerialzer { - pub shape: ArrayShape, - pub data_type: DataType, #[serde_as(as = "TryFromInto")] - #[serde(rename = "chunk_grid")] - pub chunk_shape: ChunkShape, + pub chunk_grid: Vec, - #[serde_as(as = "TryFromInto")] - pub chunk_key_encoding: ChunkKeyEncoding, - pub fill_value: serde_json::Value, - pub codecs: Vec, - #[serde(skip_serializing_if = "Option::is_none")] - pub storage_transformers: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - // each dimension name can be null in Zarr - pub dimension_names: Option, + pub dimension_names: Option>>, } -impl TryFrom for ZarrArrayMetadata { - type Error = IcechunkFormatError; - - fn try_from(value: ZarrArrayMetadataSerialzer) -> Result { - let ZarrArrayMetadataSerialzer { - shape, - data_type, - chunk_shape, - chunk_key_encoding, - fill_value, - codecs, - storage_transformers, - dimension_names, - } = value; - { - let fill_value = FillValue::from_data_type_and_json(&data_type, &fill_value)?; - Ok(ZarrArrayMetadata { - fill_value, - shape, - data_type, - chunk_shape, - chunk_key_encoding, - codecs, - storage_transformers, - dimension_names, - }) - } +impl ArrayMetadata { + fn dimension_names(&self) -> Option> { + self.dimension_names + .as_ref() + .map(|ds| ds.iter().map(|d| d.as_ref().map(|s| s.as_str()).into()).collect()) } -} - -impl From for ZarrArrayMetadataSerialzer { - fn from(value: ZarrArrayMetadata) -> Self { - let ZarrArrayMetadata { - shape, - data_type, - chunk_shape, - chunk_key_encoding, - fill_value, - codecs, - storage_transformers, - dimension_names, - } = value; - { - fn fill_value_to_json(f: FillValue) -> serde_json::Value { - match f { - FillValue::Bool(b) => b.into(), - FillValue::Int8(n) => n.into(), - FillValue::Int16(n) => n.into(), - FillValue::Int32(n) => n.into(), - FillValue::Int64(n) => n.into(), - FillValue::UInt8(n) => n.into(), - FillValue::UInt16(n) => n.into(), - FillValue::UInt32(n) => n.into(), - FillValue::UInt64(n) => n.into(), - FillValue::Float16(f) => { - if f.is_nan() { - FillValue::NAN_STR.into() - } else if f == f32::INFINITY { - FillValue::INF_STR.into() - } else if f == f32::NEG_INFINITY { - FillValue::NEG_INF_STR.into() - } else { - f.into() - } - } - FillValue::Float32(f) => { - if f.is_nan() { - FillValue::NAN_STR.into() - } else if f == f32::INFINITY { - FillValue::INF_STR.into() - } else if f == f32::NEG_INFINITY { - FillValue::NEG_INF_STR.into() - } else { - f.into() - } - } - FillValue::Float64(f) => { - if f.is_nan() { - FillValue::NAN_STR.into() - } else if f == f64::INFINITY { - FillValue::INF_STR.into() - } else if f == f64::NEG_INFINITY { - FillValue::NEG_INF_STR.into() - } else { - f.into() - } - } - FillValue::Complex64(r, i) => ([r, i].as_ref()).into(), - FillValue::Complex128(r, i) => ([r, i].as_ref()).into(), - FillValue::String(s) => s.into(), - FillValue::Bytes(b) => b.into(), - } - } - let fill_value = fill_value_to_json(fill_value); - ZarrArrayMetadataSerialzer { - shape, - data_type, - chunk_shape, - chunk_key_encoding, - codecs, - storage_transformers, - dimension_names, - fill_value, - } + fn shape(&self) -> Option { + if self.shape.len() != self.chunk_grid.len() { + None + } else { + ArrayShape::new( + self.shape.iter().zip(self.chunk_grid.iter()).map(|(a, b)| (*a, *b)), + ) } } } #[derive(Debug, Serialize, Deserialize)] struct GroupMetadata { - zarr_format: u8, #[serde(deserialize_with = "validate_group_node_type")] node_type: String, - #[serde(skip_serializing_if = "Option::is_none")] - attributes: Option, } fn validate_group_node_type<'de, D>(d: D) -> Result @@ -1218,49 +1076,18 @@ where Ok(value) } -impl ArrayMetadata { - fn new(attributes: Option, zarr_metadata: ZarrArrayMetadata) -> Self { - Self { zarr_format: 3, node_type: "array".to_string(), attributes, zarr_metadata } - } - - fn to_bytes(&self) -> Bytes { - Bytes::from_iter( - // We can unpack because it comes from controlled datastructures that can be serialized - #[allow(clippy::expect_used)] - serde_json::to_vec(self).expect("bug in ArrayMetadata serialization"), - ) - } -} - -impl GroupMetadata { - fn new(attributes: Option) -> Self { - Self { zarr_format: 3, node_type: "group".to_string(), attributes } - } - - fn to_bytes(&self) -> Bytes { - Bytes::from_iter( - // We can unpack because it comes from controlled datastructures that can be serialized - #[allow(clippy::expect_used)] - serde_json::to_vec(self).expect("bug in GroupMetadata serialization"), - ) - } -} - #[derive(Serialize, Deserialize)] struct NameConfigSerializer { name: String, configuration: serde_json::Value, } -impl From for NameConfigSerializer { - fn from(value: ChunkShape) -> Self { +impl From> for NameConfigSerializer { + fn from(value: Vec) -> Self { let arr = serde_json::Value::Array( value - .0 .iter() - .map(|v| { - serde_json::Value::Number(serde_json::value::Number::from(v.get())) - }) + .map(|v| serde_json::Value::Number(serde_json::value::Number::from(*v))) .collect(), ); let kvs = serde_json::value::Map::from_iter(iter::once(( @@ -1274,7 +1101,7 @@ impl From for NameConfigSerializer { } } -impl TryFrom for ChunkShape { +impl TryFrom for Vec { type Error = &'static str; fn try_from(value: NameConfigSerializer) -> Result { @@ -1289,52 +1116,16 @@ impl TryFrom for ChunkShape { .ok_or("cannot parse ChunkShape")?; let shape = values .iter() - .map(|v| v.as_u64().and_then(|u64| NonZeroU64::try_from(u64).ok())) + .map(|v| v.as_u64()) .collect::>>() .ok_or("cannot parse ChunkShape")?; - Ok(ChunkShape(shape)) + Ok(shape) } _ => Err("cannot parse ChunkShape"), } } } -impl From for NameConfigSerializer { - fn from(_value: ChunkKeyEncoding) -> Self { - let kvs = serde_json::value::Map::from_iter(iter::once(( - "separator".to_string(), - serde_json::Value::String("/".to_string()), - ))); - Self { - name: "default".to_string(), - configuration: serde_json::Value::Object(kvs), - } - } -} - -impl TryFrom for ChunkKeyEncoding { - type Error = &'static str; - - fn try_from(value: NameConfigSerializer) -> Result { - //FIXME: we are hardcoding / as the separator - match value { - NameConfigSerializer { - name, - configuration: serde_json::Value::Object(kvs), - } if name == "default" => { - if let Some("/") = - kvs.get("separator").ok_or("cannot parse ChunkKeyEncoding")?.as_str() - { - Ok(ChunkKeyEncoding::Slash) - } else { - Err("cannot parse ChunkKeyEncoding") - } - } - _ => Err("cannot parse ChunkKeyEncoding"), - } - } -} - #[cfg(test)] #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] mod tests { @@ -1537,97 +1328,28 @@ mod tests { .is_err()); assert!(serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ) - .is_ok()); + r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# + ) + .is_ok()); assert!(serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"group","shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ) - .is_err()); + r#"{"zarr_format":3,"node_type":"group","shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# + ) + .is_err()); // deserialize with nan - assert!(matches!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float16(n) if n.is_nan() - )); - assert!(matches!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float32(n) if n.is_nan() - )); - assert!(matches!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float64","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float64(n) if n.is_nan() - )); - - // deserialize with infinity - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float16(f32::INFINITY) - ); - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float32(f32::INFINITY) - ); - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float64","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float64(f64::INFINITY) - ); - - // deserialize with -infinity assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"-Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float16(f32::NEG_INFINITY) - ); - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"-Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float32(f32::NEG_INFINITY) - ); - assert_eq!( - serde_json::from_str::( - r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float64","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"-Infinity","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# - ).unwrap().zarr_metadata.fill_value, - FillValue::Float64(f64::NEG_INFINITY) - ); - - // infinity roundtrip - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Float16, - chunk_shape: ChunkShape(vec![NonZeroU64::new(2).unwrap()]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float16(f32::NEG_INFINITY), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("t".to_string())]), - }; - let zarr_meta = ArrayMetadata::new(None, zarr_meta); + serde_json::from_str::( + r#"{"zarr_format":3,"node_type":"array","shape":[2,2,2],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# + ).unwrap().dimension_names(), + Some(vec!["x".into(), "y".into(), "t".into()]) + ); assert_eq!( - serde_json::from_str::( - serde_json::to_string(&zarr_meta).unwrap().as_str() - ) - .unwrap(), - zarr_meta, - ) + serde_json::from_str::( + r#"{"zarr_format":3,"node_type":"array","shape":[2,3,4],"data_type":"float16","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,2,3]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":"NaN","codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"# + ).unwrap().shape(), + ArrayShape::new(vec![(2,1), (3,2), (4,3) ]) + ); } #[tokio::test] @@ -1644,7 +1366,7 @@ mod tests { store .set( "zarr.json", - Bytes::copy_from_slice(br#"{"zarr_format":3, "node_type":"group"}"#), + Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"group"}"#), ) .await?; assert_eq!( @@ -1652,13 +1374,13 @@ mod tests { Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"group"}"#) ); - store.set("a/b/zarr.json", Bytes::copy_from_slice(br#"{"zarr_format":3, "node_type":"group", "attributes": {"spam":"ham", "eggs":42}}"#)).await?; + store.set("a/b/zarr.json", Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"group","attributes":{"spam":"ham","eggs":42}}"#)).await?; assert_eq!( - store.get("a/b/zarr.json", &ByteRange::ALL).await.unwrap(), - Bytes::copy_from_slice( - br#"{"zarr_format":3,"node_type":"group","attributes":{"eggs":42,"spam":"ham"}}"# - ) - ); + store.get("a/b/zarr.json", &ByteRange::ALL).await.unwrap(), + Bytes::copy_from_slice( + br#"{"zarr_format":3,"node_type":"group","attributes":{"spam":"ham","eggs":42}}"# + ) + ); let zarr_meta = Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"array","attributes":{"foo":42},"shape":[2,2,2],"data_type":"int32","chunk_grid":{"name":"regular","configuration":{"chunk_shape":[1,1,1]}},"chunk_key_encoding":{"name":"default","configuration":{"separator":"/"}},"fill_value":0,"codecs":[{"name":"mycodec","configuration":{"foo":42}}],"storage_transformers":[{"name":"mytransformer","configuration":{"bar":43}}],"dimension_names":["x","y","t"]}"#); store.set("a/b/array/zarr.json", zarr_meta.clone()).await?; @@ -2568,7 +2290,7 @@ mod tests { store .set( "zarr.json", - Bytes::copy_from_slice(br#"{"zarr_format":3, "node_type":"group"}"#), + Bytes::copy_from_slice(br#"{"zarr_format":3,"node_type":"group"}"#), ) .await .unwrap(); diff --git a/icechunk/src/strategies.rs b/icechunk/src/strategies.rs index 3719f0db..cd3d6555 100644 --- a/icechunk/src/strategies.rs +++ b/icechunk/src/strategies.rs @@ -6,12 +6,8 @@ use prop::string::string_regex; use proptest::prelude::*; use proptest::{collection::vec, option, strategy::Strategy}; -use crate::format::snapshot::ZarrArrayMetadata; +use crate::format::snapshot::{ArrayShape, DimensionName}; use crate::format::{ChunkIndices, Path}; -use crate::metadata::{ - ArrayShape, ChunkKeyEncoding, ChunkShape, Codec, DimensionNames, FillValue, - StorageTransformer, -}; use crate::session::Session; use crate::storage::new_in_memory_storage; use crate::Repository; @@ -62,25 +58,10 @@ prop_compose! { } } -pub fn codecs() -> impl Strategy> { - prop_oneof![Just(vec![Codec { name: "mycodec".to_string(), configuration: None }]),] -} - -pub fn storage_transformers() -> impl Strategy>> { - prop_oneof![ - Just(Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }])), - Just(None), - ] -} - #[derive(Debug)] pub struct ShapeDim { - shape: ArrayShape, - chunk_shape: ChunkShape, - dimension_names: Option, + pub shape: ArrayShape, + pub dimension_names: Option>, } pub fn shapes_and_dims(max_ndim: Option) -> impl Strategy { @@ -105,32 +86,37 @@ pub fn shapes_and_dims(max_ndim: Option) -> impl Strategy()), ndim))) }) .prop_map(|(shape, chunk_shape, dimension_names)| ShapeDim { - shape, - chunk_shape: ChunkShape(chunk_shape), - dimension_names, + #[allow(clippy::expect_used)] + shape: ArrayShape::new( + shape.into_iter().zip(chunk_shape.iter().map(|n| n.get())), + ) + .expect("Invalid array shape"), + dimension_names: dimension_names.map(|ds| { + ds.iter().map(|s| From::from(s.as_ref().map(|s| s.as_str()))).collect() + }), }) } -prop_compose! { - pub fn zarr_array_metadata()( - chunk_key_encoding: ChunkKeyEncoding, - fill_value: FillValue, - shape_and_dim in shapes_and_dims(None), - storage_transformers in storage_transformers(), - codecs in codecs(), - ) -> ZarrArrayMetadata { - ZarrArrayMetadata { - shape: shape_and_dim.shape, - data_type: fill_value.get_data_type(), - chunk_shape: shape_and_dim.chunk_shape, - chunk_key_encoding, - fill_value, - codecs, - storage_transformers, - dimension_names: shape_and_dim.dimension_names, - } - } -} +//prop_compose! { +// pub fn zarr_array_metadata()( +// chunk_key_encoding: ChunkKeyEncoding, +// fill_value: FillValue, +// shape_and_dim in shapes_and_dims(None), +// storage_transformers in storage_transformers(), +// codecs in codecs(), +// ) -> ZarrArrayMetadata { +// ZarrArrayMetadata { +// shape: shape_and_dim.shape, +// data_type: fill_value.get_data_type(), +// chunk_shape: shape_and_dim.chunk_shape, +// chunk_key_encoding, +// fill_value, +// codecs, +// storage_transformers, +// dimension_names: shape_and_dim.dimension_names, +// } +// } +//} prop_compose! { pub fn chunk_indices(dim: usize, values_in: Range)(v in proptest::collection::vec(values_in, dim..(dim+1))) -> ChunkIndices { diff --git a/icechunk/tests/test_concurrency.rs b/icechunk/tests/test_concurrency.rs index 41418fd4..a153237d 100644 --- a/icechunk/tests/test_concurrency.rs +++ b/icechunk/tests/test_concurrency.rs @@ -1,10 +1,7 @@ #![allow(clippy::expect_used, clippy::unwrap_used)] use bytes::Bytes; use icechunk::{ - format::{snapshot::ZarrArrayMetadata, ByteRange, ChunkIndices, Path}, - metadata::{ - ChunkKeyEncoding, ChunkShape, Codec, DataType, FillValue, StorageTransformer, - }, + format::{snapshot::ArrayShape, ByteRange, ChunkIndices, Path}, session::{get_chunk, Session}, storage::new_in_memory_storage, Repository, Storage, @@ -13,7 +10,6 @@ use pretty_assertions::assert_eq; use rand::{rng, Rng}; use std::{ collections::{HashMap, HashSet}, - num::NonZeroU64, sync::Arc, time::Duration, }; @@ -39,26 +35,11 @@ async fn test_concurrency() -> Result<(), Box> { let repo = Repository::create(None, storage, HashMap::new()).await?; let mut ds = repo.writable_session("main").await?; - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![N as u64, N as u64], - data_type: DataType::Float64, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(1).expect("Cannot create NonZero"), - NonZeroU64::new(1).expect("Cannot create NonZero"), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Float64(f64::NAN), - codecs: vec![Codec { name: "mycodec".to_string(), configuration: None }], - storage_transformers: Some(vec![StorageTransformer { - name: "mytransformer".to_string(), - configuration: None, - }]), - dimension_names: Some(vec![Some("x".to_string()), Some("y".to_string())]), - }; - + let shape = ArrayShape::new(vec![(N as u64, 1), (N as u64, 1)]).unwrap(); + let dimension_names = Some(vec!["x".into(), "y".into()]); + let user_data = Bytes::new(); let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await?; + ds.add_array(new_array_path.clone(), shape, dimension_names, user_data).await?; let ds = Arc::new(RwLock::new(ds)); @@ -154,8 +135,7 @@ async fn list_task(ds: Arc>, barrier: Arc) { expected_indices.insert(ChunkIndices(vec![x, y])); } } - let expected_nodes: HashSet = - HashSet::from_iter(vec!["/".to_string(), "/array".to_string()]); + let expected_nodes: HashSet = HashSet::from_iter(vec!["/array".to_string()]); barrier.wait().await; loop { diff --git a/icechunk/tests/test_distributed_writes.rs b/icechunk/tests/test_distributed_writes.rs index e12f21d3..f5e25158 100644 --- a/icechunk/tests/test_distributed_writes.rs +++ b/icechunk/tests/test_distributed_writes.rs @@ -1,12 +1,11 @@ #![allow(clippy::unwrap_used)] use pretty_assertions::assert_eq; -use std::{collections::HashMap, num::NonZeroU64, ops::Range, sync::Arc}; +use std::{collections::HashMap, ops::Range, sync::Arc}; use bytes::Bytes; use icechunk::{ config::{S3Credentials, S3Options, S3StaticCredentials}, - format::{snapshot::ZarrArrayMetadata, ByteRange, ChunkIndices, Path, SnapshotId}, - metadata::{ChunkKeyEncoding, ChunkShape, DataType, FillValue}, + format::{snapshot::ArrayShape, ByteRange, ChunkIndices, Path, SnapshotId}, repository::VersionInfo, session::{get_chunk, Session}, storage::new_s3_storage, @@ -124,22 +123,11 @@ async fn test_distributed_writes() -> Result<(), Box Result<(), Box> { let mut ds = repo.writable_session("main").await?; - ds.add_group(Path::root()).await?; - let zarr_meta = ZarrArrayMetadata { - shape: vec![1100], - data_type: DataType::Int8, - chunk_shape: ChunkShape(vec![NonZeroU64::new(1).expect("Cannot create NonZero")]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int8(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; + let user_data = Bytes::new(); + ds.add_group(Path::root(), user_data.clone()).await?; + + let shape = ArrayShape::new(vec![(1100, 1)]).unwrap(); let array_path: Path = "/array".try_into().unwrap(); - ds.add_array(array_path.clone(), zarr_meta.clone()).await?; + ds.add_array(array_path.clone(), shape, None, user_data.clone()).await?; // we write more than 1k chunks to go beyond the chunk size for object listing and delete for idx in 0..1100 { let bytes = Bytes::copy_from_slice(&42i8.to_be_bytes()); @@ -200,56 +192,57 @@ async fn make_design_doc_repo( repo: &mut Repository, ) -> Result, Box> { let mut session = repo.writable_session("main").await?; - session.add_group(Path::root()).await?; + let user_data = Bytes::new(); + session.add_group(Path::root(), user_data.clone()).await?; session.commit("1", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/2").unwrap()).await?; + session.add_group(Path::try_from("/2").unwrap(), user_data.clone()).await?; let commit_2 = session.commit("2", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/4").unwrap()).await?; + session.add_group(Path::try_from("/4").unwrap(), user_data.clone()).await?; session.commit("4", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/5").unwrap()).await?; + session.add_group(Path::try_from("/5").unwrap(), user_data.clone()).await?; let snap_id = session.commit("5", None).await?; repo.create_tag("tag2", &snap_id).await?; repo.create_branch("develop", &commit_2).await?; let mut session = repo.writable_session("develop").await?; - session.add_group(Path::try_from("/3").unwrap()).await?; + session.add_group(Path::try_from("/3").unwrap(), user_data.clone()).await?; let snap_id = session.commit("3", None).await?; repo.create_tag("tag1", &snap_id).await?; let mut session = repo.writable_session("develop").await?; - session.add_group(Path::try_from("/6").unwrap()).await?; + session.add_group(Path::try_from("/6").unwrap(), user_data.clone()).await?; let commit_6 = session.commit("6", None).await?; repo.create_branch("test", &commit_6).await?; let mut session = repo.writable_session("test").await?; - session.add_group(Path::try_from("/7").unwrap()).await?; + session.add_group(Path::try_from("/7").unwrap(), user_data.clone()).await?; let commit_7 = session.commit("7", None).await?; let expire_older_than = Utc::now(); repo.create_branch("qa", &commit_7).await?; let mut session = repo.writable_session("qa").await?; - session.add_group(Path::try_from("/8").unwrap()).await?; + session.add_group(Path::try_from("/8").unwrap(), user_data.clone()).await?; session.commit("8", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/12").unwrap()).await?; + session.add_group(Path::try_from("/12").unwrap(), user_data.clone()).await?; session.commit("12", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/13").unwrap()).await?; + session.add_group(Path::try_from("/13").unwrap(), user_data.clone()).await?; session.commit("13", None).await?; let mut session = repo.writable_session("main").await?; - session.add_group(Path::try_from("/14").unwrap()).await?; + session.add_group(Path::try_from("/14").unwrap(), user_data.clone()).await?; session.commit("14", None).await?; let mut session = repo.writable_session("develop").await?; - session.add_group(Path::try_from("/10").unwrap()).await?; + session.add_group(Path::try_from("/10").unwrap(), user_data.clone()).await?; session.commit("10", None).await?; let mut session = repo.writable_session("develop").await?; - session.add_group(Path::try_from("/11").unwrap()).await?; + session.add_group(Path::try_from("/11").unwrap(), user_data.clone()).await?; session.commit("11", None).await?; let mut session = repo.writable_session("test").await?; - session.add_group(Path::try_from("/9").unwrap()).await?; + session.add_group(Path::try_from("/9").unwrap(), user_data.clone()).await?; session.commit("9", None).await?; // let's double check the structuer of the commit tree diff --git a/icechunk/tests/test_virtual_refs.rs b/icechunk/tests/test_virtual_refs.rs index 64034c6f..03cfd665 100644 --- a/icechunk/tests/test_virtual_refs.rs +++ b/icechunk/tests/test_virtual_refs.rs @@ -9,10 +9,9 @@ mod tests { Checksum, ChunkPayload, SecondsSinceEpoch, VirtualChunkLocation, VirtualChunkRef, VirtualReferenceErrorKind, }, - snapshot::ZarrArrayMetadata, + snapshot::ArrayShape, ByteRange, ChunkId, ChunkIndices, Path, }, - metadata::{ChunkKeyEncoding, ChunkShape, DataType, FillValue}, repository::VersionInfo, session::{get_chunk, SessionErrorKind}, storage::{ @@ -25,7 +24,6 @@ mod tests { use std::{ collections::{HashMap, HashSet}, error::Error, - num::NonZeroU64, path::PathBuf, vec, }; @@ -196,20 +194,9 @@ mod tests { let repo_dir = TempDir::new()?; let repo = create_local_repository(repo_dir.path()).await; let mut ds = repo.writable_session("main").await.unwrap(); - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; + + let shape = ArrayShape::new(vec![(1, 1), (1, 1), (2, 1)]).unwrap(); + let user_data = Bytes::new(); let payload1 = ChunkPayload::Virtual(VirtualChunkRef { location: VirtualChunkLocation::from_absolute_path(&format!( // intentional extra '/' @@ -231,7 +218,7 @@ mod tests { }); let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await.unwrap(); + ds.add_array(new_array_path.clone(), shape, None, user_data).await.unwrap(); ds.set_chunk_ref( new_array_path.clone(), @@ -313,20 +300,8 @@ mod tests { let repo = create_minio_repository().await; let mut ds = repo.writable_session("main").await.unwrap(); - let zarr_meta = ZarrArrayMetadata { - shape: vec![1, 1, 2], - data_type: DataType::Int32, - chunk_shape: ChunkShape(vec![ - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(2).unwrap(), - NonZeroU64::new(1).unwrap(), - ]), - chunk_key_encoding: ChunkKeyEncoding::Slash, - fill_value: FillValue::Int32(0), - codecs: vec![], - storage_transformers: None, - dimension_names: None, - }; + let shape = ArrayShape::new(vec![(1, 1), (1, 1), (2, 1)]).unwrap(); + let user_data = Bytes::new(); let payload1 = ChunkPayload::Virtual(VirtualChunkRef { location: VirtualChunkLocation::from_absolute_path(&format!( // intentional extra '/' @@ -348,7 +323,7 @@ mod tests { }); let new_array_path: Path = "/array".try_into().unwrap(); - ds.add_array(new_array_path.clone(), zarr_meta.clone()).await.unwrap(); + ds.add_array(new_array_path.clone(), shape, None, user_data).await.unwrap(); ds.set_chunk_ref( new_array_path.clone(),