Skip to content

Commit

Permalink
Fix tests by recreating the repo every time
Browse files Browse the repository at this point in the history
  • Loading branch information
abarciauskas-bgse committed Feb 3, 2025
1 parent 14f58ba commit b4c205e
Showing 1 changed file with 38 additions and 48 deletions.
86 changes: 38 additions & 48 deletions virtualizarr/tests/test_writers/test_icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
from virtualizarr.zarr import ZArray

if TYPE_CHECKING:
from icechunk import IcechunkStore, Storage # type: ignore[import-not-found]
from icechunk import ( # type: ignore[import-not-found]
IcechunkStore,
Repository,
Session,
Storage,
)


@pytest.fixture(scope="function")
Expand All @@ -31,11 +36,16 @@ def icechunk_storage(tmp_path: Path) -> "Storage":


@pytest.fixture(scope="function")
def icechunk_filestore(icechunk_storage: "Storage") -> "IcechunkStore":
def icechunk_repo(icechunk_storage: "Storage") -> "Session":
from icechunk import Repository

repo = Repository.create(storage=icechunk_storage)
session = repo.writable_session("main")
return repo


@pytest.fixture(scope="function")
def icechunk_filestore(icechunk_repo: "Repository") -> "IcechunkStore":
session = icechunk_repo.writable_session("main")
return session.store


Expand Down Expand Up @@ -537,44 +547,42 @@ class TestAppend:
# Success cases
## When appending to a single virtual ref without encoding, it succeeds
def test_append_virtual_ref_without_encoding(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
import xarray.testing as xrt
from icechunk import Repository

# generate virtual dataset
vds = gen_virtual_dataset(file_uri=simple_netcdf4)
# create the icechunk store and commit the first virtual dataset
repo = Repository.create(storage=icechunk_storage)
session = repo.writable_session("main")
vds.virtualize.to_icechunk(session.store)
session.commit(
# Commit the first virtual dataset
writable_session = icechunk_repo.writable_session("main")
vds.virtualize.to_icechunk(writable_session.store)
writable_session.commit(
"test commit"
) # need to commit it in order to append to it in the next lines
append_session = icechunk_repo.writable_session("main")

# Append the same dataset to the same store
icechunk_filestore_append = repo.writable_session("main")
vds.virtualize.to_icechunk(icechunk_filestore_append.store, append_dim="x")
icechunk_filestore_append.commit("appended data")
vds.virtualize.to_icechunk(append_session.store, append_dim="x")
append_session.commit("appended data")

icechunk_filestore_append = repo.writable_session("main")
vds.virtualize.to_icechunk(icechunk_filestore_append.store, append_dim="x")
icechunk_filestore_append.commit("appended data again")
second_append_session = icechunk_repo.writable_session("main")
vds.virtualize.to_icechunk(second_append_session.store, append_dim="x")
second_append_session.commit("appended data again")

read_session = icechunk_repo.readonly_session(branch="main")
with (
xr.open_zarr(
icechunk_filestore_append.store, consolidated=False, zarr_format=3
read_session.store, consolidated=False, zarr_format=3
) as array,
xr.open_dataset(simple_netcdf4) as expected_ds,
):
expected_array = xr.concat([expected_ds, expected_ds, expected_ds], dim="x")
xrt.assert_identical(array, expected_array)

def test_append_virtual_ref_with_encoding(
self, icechunk_storage: "Storage", netcdf4_files_factory: Callable
self, icechunk_repo: "Repository", netcdf4_files_factory: Callable
):
import xarray.testing as xrt
from icechunk import Repository

scale_factor = 0.01
encoding = {"air": {"scale_factor": scale_factor}}
Expand Down Expand Up @@ -605,8 +613,7 @@ def test_append_virtual_ref_with_encoding(
),
)

# create the icechunk store and commit the first virtual dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the first virtual dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit(
Expand All @@ -633,10 +640,9 @@ def test_append_virtual_ref_with_encoding(
## When appending to a virtual ref with encoding, it succeeds
@pytest.mark.asyncio
async def test_append_with_multiple_root_arrays(
self, icechunk_storage: "Storage", netcdf4_files_factory: Callable
self, icechunk_repo: "Repository", netcdf4_files_factory: Callable
):
import xarray.testing as xrt
from icechunk import Repository
from zarr.core.buffer import default_buffer_prototype

filepath1, filepath2 = netcdf4_files_factory(
Expand Down Expand Up @@ -713,8 +719,6 @@ async def test_append_with_multiple_root_arrays(
),
)

# create the icechunk store and commit the first virtual dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit(
Expand Down Expand Up @@ -748,12 +752,11 @@ async def test_append_with_multiple_root_arrays(
@pytest.mark.parametrize("zarr_format", [2, 3])
def test_append_with_compression_succeeds(
self,
icechunk_storage: "Storage",
icechunk_repo: "Repository",
netcdf4_files_factory: Callable,
zarr_format: Literal[2, 3],
):
import xarray.testing as xrt
from icechunk import Repository

encoding = {
"air": {
Expand Down Expand Up @@ -792,8 +795,7 @@ def test_append_with_compression_succeeds(
),
)

# Create icechunk store and commit the compressed dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the compressed dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("test commit")
Expand All @@ -815,15 +817,12 @@ def test_append_with_compression_succeeds(

## When chunk shapes are different it fails
def test_append_with_different_chunking_fails(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
from icechunk import Repository

# Generate a virtual dataset with specific chunking
vds = gen_virtual_dataset(file_uri=simple_netcdf4, chunk_shape=(3, 4))

# Create icechunk store and commit the dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("test commit")
Expand All @@ -842,10 +841,8 @@ def test_append_with_different_chunking_fails(

## When encoding is different it fails
def test_append_with_different_encoding_fails(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
from icechunk import Repository

# Generate datasets with different encoding
vds1 = gen_virtual_dataset(
file_uri=simple_netcdf4, encoding={"scale_factor": 0.1}
Expand All @@ -854,8 +851,7 @@ def test_append_with_different_encoding_fails(
file_uri=simple_netcdf4, encoding={"scale_factor": 0.01}
)

# Create icechunk store and commit the first dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the first dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("test commit")
Expand All @@ -869,10 +865,8 @@ def test_append_with_different_encoding_fails(
vds2.virtualize.to_icechunk(icechunk_filestore_append.store, append_dim="x")

def test_dimensions_do_not_align(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
from icechunk import Repository

# Generate datasets with different lengths on the non-append dimension (x)
vds1 = gen_virtual_dataset(
# {'x': 5, 'y': 4}
Expand All @@ -885,8 +879,7 @@ def test_dimensions_do_not_align(
shape=(6, 4),
)

# Create icechunk store and commit the first dataset
icechunk_repo = Repository.create(storage=icechunk_storage)
# Commit the first dataset
icechunk_filestore = icechunk_repo.writable_session("main")
vds1.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("test commit")
Expand All @@ -897,18 +890,15 @@ def test_dimensions_do_not_align(
vds2.virtualize.to_icechunk(icechunk_filestore_append.store, append_dim="y")

def test_append_dim_not_in_dims_raises_error(
self, icechunk_storage: "Storage", simple_netcdf4: str
self, icechunk_repo: "Repository", simple_netcdf4: str
):
"""
Test that attempting to append with an append_dim not present in dims raises a ValueError.
"""
from icechunk import Repository

vds = gen_virtual_dataset(
file_uri=simple_netcdf4, shape=(5, 4), chunk_shape=(5, 4), dims=["x", "y"]
)

icechunk_repo = Repository.create(storage=icechunk_storage)
icechunk_filestore = icechunk_repo.writable_session("main")
vds.virtualize.to_icechunk(icechunk_filestore.store)
icechunk_filestore.commit("initial commit")
Expand Down

0 comments on commit b4c205e

Please sign in to comment.