Skip to content

Commit

Permalink
Fix tests by recreating the repo every time (#417)
Browse files Browse the repository at this point in the history
* Fix tests by recreating the repo every time

* Add release note about icechunk 0.1.0a12

* Fix mypy error

---------

Co-authored-by: Tom Nicholas <tom@cworthy.org>
  • Loading branch information
abarciauskas-bgse and TomNicholas authored Feb 4, 2025
1 parent 24a4582 commit 326440b
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 50 deletions.
2 changes: 1 addition & 1 deletion ci/upstream.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ dependencies:
- fsspec
- pip
- pip:
- icechunk>=0.1.0a12 # Installs python-zarr v3 as dependency
- icechunk>=0.1.0a12 # Installs zarr-python v3.0.0 as dependency
- git+https://github.com/fsspec/kerchunk.git@main
- imagecodecs-numcodecs==2024.6.1
3 changes: 2 additions & 1 deletion docs/releases.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ New Features
(:pull:`309`) By `Tom Nicholas <https://github.com/TomNicholas>`_.
- Added a ``.nbytes`` accessor method which displays the bytes needed to hold the virtual references in memory.
(:issue:`167`, :pull:`227`) By `Tom Nicholas <https://github.com/TomNicholas>`_.
- Upgrade icechunk dependency to ``>=0.1.0a12``. (:pull:`406`) By `Julia Signell <https://github.com/jsignell>`_.
- Sync with Icechunk v0.1.0a8 (:pull:`368`) By `Matthew Iannucci <https://github.com/mpiannucci>`. This also adds support
for the `to_icechunk` method to add timestamps as checksums when writing virtual references to an icechunk store. This
is useful for ensuring that virtual references are not stale when reading from an icechunk store, which can happen if the
Expand Down Expand Up @@ -40,7 +41,7 @@ Breaking changes
- The default backend for netCDF4 and HDF5 is now the custom ``HDFVirtualBackend`` replacing
the previous default which was a wrapper around the kerchunk backend.
(:issue:`374`, :pull:`395`) By `Julia Signell <https://github.com/jsignell>`_.
- Optional dependency on kerchunk is now the newly-released v0.2.8. This release of kerchunk is compatible with zarr-python v3,
- Optional dependency on kerchunk is now the newly-released v0.2.8. This release of kerchunk is compatible with zarr-python v3.0.0,
which means a released version of kerchunk can now be used with both VirtualiZarr and Icechunk.
(:issue:`392`, :pull:`406`, :pull:`412``) By `Julia Signell <https://github.com/jsignell>`_ and `Tom Nicholas <https://github.com/TomNicholas>`_.

Expand Down
85 changes: 37 additions & 48 deletions virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
from virtualizarr.zarr import ZArray

if TYPE_CHECKING:
from icechunk import IcechunkStore, Storage # type: ignore[import-not-found]
from icechunk import ( # type: ignore[import-not-found]
IcechunkStore,
Repository,
Storage,
)


@pytest.fixture(scope="function")
Expand All @@ -31,11 +35,16 @@ def icechunk_storage(tmp_path: Path) -> "Storage":


@pytest.fixture(scope="function")
def icechunk_filestore(icechunk_storage: "Storage") -> "IcechunkStore":
def icechunk_repo(icechunk_storage: "Storage") -> "Repository":
from icechunk import Repository

repo = Repository.create(storage=icechunk_storage)
session = repo.writable_session("main")
return repo


@pytest.fixture(scope="function")
def icechunk_filestore(icechunk_repo: "Repository") -> "IcechunkStore":
session = icechunk_repo.writable_session("main")
return session.store


Expand Down Expand Up @@ -537,44 +546,42 @@ class TestAppend:
# Success cases
## When appending to a single virtual ref without encoding, it succeeds
def test_append_virtual_ref_without_encoding(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
import xarray.testing as xrt
from icechunk import Repository

# generate virtual dataset
vds = gen_virtual_dataset(file_uri=simple_netcdf4)
# create the icechunk store and commit the first virtual dataset
repo = Repository.create(storage=icechunk_storage)
session = repo.writable_session("main")
vds.virtualize.to_icechunk(session.store)
session.commit(
# Commit the first virtual dataset
writable_session = icechunk_repo.writable_session("main")
vds.virtualize.to_icechunk(writable_session.store)
writable_session.commit(
"test commit"
) # need to commit it in order to append to it in the next lines
append_session = icechunk_repo.writable_session("main")

# Append the same dataset to the same store
icechunk_filestore_append = repo.writable_session("main")
vds.virtualize.to_icechunk(icechunk_filestore_append.store, append_dim="x")
icechunk_filestore_append.commit("appended data")
vds.virtualize.to_icechunk(append_session.store, append_dim="x")
append_session.commit("appended data")

icechunk_filestore_append = repo.writable_session("main")
vds.virtualize.to_icechunk(icechunk_filestore_append.store, append_dim="x")
icechunk_filestore_append.commit("appended data again")
second_append_session = icechunk_repo.writable_session("main")
vds.virtualize.to_icechunk(second_append_session.store, append_dim="x")
second_append_session.commit("appended data again")

read_session = icechunk_repo.readonly_session(branch="main")
with (
xr.open_zarr(
icechunk_filestore_append.store, consolidated=False, zarr_format=3
read_session.store, consolidated=False, zarr_format=3
) as array,
xr.open_dataset(simple_netcdf4) as expected_ds,
):
expected_array = xr.concat([expected_ds, expected_ds, expected_ds], dim="x")
xrt.assert_identical(array, expected_array)

def test_append_virtual_ref_with_encoding(
self, icechunk_storage: "Storage", netcdf4_files_factory: Callable
self, icechunk_repo: "Repository", netcdf4_files_factory: Callable
):
import xarray.testing as xrt
from icechunk import Repository

scale_factor = 0.01
encoding = {"air": {"scale_factor": scale_factor}}
Expand Down Expand Up @@ -605,8 +612,7 @@ def test_append_virtual_ref_with_encoding(
),
)

# create the icechunk store and commit the first virtual dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the first virtual dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit(
Expand All @@ -633,10 +639,9 @@ def test_append_virtual_ref_with_encoding(
## When appending to a virtual ref with encoding, it succeeds
@pytest.mark.asyncio
async def test_append_with_multiple_root_arrays(
self, icechunk_storage: "Storage", netcdf4_files_factory: Callable
self, icechunk_repo: "Repository", netcdf4_files_factory: Callable
):
import xarray.testing as xrt
from icechunk import Repository
from zarr.core.buffer import default_buffer_prototype

filepath1, filepath2 = netcdf4_files_factory(
Expand Down Expand Up @@ -713,8 +718,6 @@ async def test_append_with_multiple_root_arrays(
),
)

# create the icechunk store and commit the first virtual dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit(
Expand Down Expand Up @@ -748,12 +751,11 @@ async def test_append_with_multiple_root_arrays(
@pytest.mark.parametrize("zarr_format", [2, 3])
def test_append_with_compression_succeeds(
self,
icechunk_storage: "Storage",
icechunk_repo: "Repository",
netcdf4_files_factory: Callable,
zarr_format: Literal[2, 3],
):
import xarray.testing as xrt
from icechunk import Repository

encoding = {
"air": {
Expand Down Expand Up @@ -792,8 +794,7 @@ def test_append_with_compression_succeeds(
),
)

# Create icechunk store and commit the compressed dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the compressed dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("test commit")
Expand All @@ -815,15 +816,12 @@ def test_append_with_compression_succeeds(

## When chunk shapes are different it fails
def test_append_with_different_chunking_fails(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
from icechunk import Repository

# Generate a virtual dataset with specific chunking
vds = gen_virtual_dataset(file_uri=simple_netcdf4, chunk_shape=(3, 4))

# Create icechunk store and commit the dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("test commit")
Expand All @@ -842,10 +840,8 @@ def test_append_with_different_chunking_fails(

## When encoding is different it fails
def test_append_with_different_encoding_fails(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
from icechunk import Repository

# Generate datasets with different encoding
vds1 = gen_virtual_dataset(
file_uri=simple_netcdf4, encoding={"scale_factor": 0.1}
Expand All @@ -854,8 +850,7 @@ def test_append_with_different_encoding_fails(
file_uri=simple_netcdf4, encoding={"scale_factor": 0.01}
)

# Create icechunk store and commit the first dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the first dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("test commit")
Expand All @@ -869,10 +864,8 @@ def test_append_with_different_encoding_fails(
vds2.virtualize.to_icechunk(icechunk_filestore_append.store, append_dim="x")

def test_dimensions_do_not_align(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
from icechunk import Repository

# Generate datasets with different lengths on the non-append dimension (x)
vds1 = gen_virtual_dataset(
# {'x': 5, 'y': 4}
Expand All @@ -885,8 +878,7 @@ def test_dimensions_do_not_align(
shape=(6, 4),
)

# Create icechunk store and commit the first dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the first dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("test commit")
Expand All @@ -897,18 +889,15 @@ def test_dimensions_do_not_align(
vds2.virtualize.to_icechunk(icechunk_filestore_append.store, append_dim="y")

def test_append_dim_not_in_dims_raises_error(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
"""
Test that attempting to append with an append_dim not present in dims raises a ValueError.
"""
from icechunk import Repository

vds = gen_virtual_dataset(
file_uri=simple_netcdf4, shape=(5, 4), chunk_shape=(5, 4), dims=["x", "y"]
)

icechunk_repo = Repository.create(storage=icechunk_storage)
icechunk_filestore = icechunk_repo.writable_session("main")
vds.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("initial commit")
Expand Down

0 comments on commit 326440b

Please sign in to comment.