Skip to content

Commit

Permalink
Fix most integration tests and writer tests
Browse files Browse the repository at this point in the history
  • Loading branch information
abarciauskas-bgse committed Feb 9, 2025
1 parent 4a8bfdd commit d05cec3
Show file tree
Hide file tree
Showing 9 changed files with 230 additions and 242 deletions.
117 changes: 111 additions & 6 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import itertools
from itertools import product
from pathlib import Path
from typing import Any, Callable, Mapping, Optional

Expand Down Expand Up @@ -191,7 +192,7 @@ def _create_manifestarray(
{"name": "numcodecs.zlib", "configuration": {"level": 1}}
],
):
metadata = array_v3_metadata(shape, chunks, codecs)
metadata = array_v3_metadata(shape=shape, chunks=chunks, codecs=codecs)
chunk_grid_shape = tuple(
ceildiv(axis_length, chunk_length)
for axis_length, chunk_length in zip(shape, chunks)
Expand All @@ -218,16 +219,16 @@ def array_v3_metadata():
def _create_metadata(
shape: tuple,
chunks: tuple,
codecs: list[dict] = [
{"name": "numcodecs.zlib", "configuration": {"level": 1}}
],
data_type: str = "int32",
codecs: list[dict] | None = None,
fill_value: int = None,
):
return ArrayV3Metadata(
shape=shape,
data_type="int32",
data_type=data_type,
chunk_grid={"name": "regular", "configuration": {"chunk_shape": chunks}},
chunk_key_encoding={"name": "default"},
fill_value=0,
fill_value=fill_value,
codecs=convert_to_codec_pipeline(
codecs=codecs,
dtype=np.dtype("int32"),
Expand Down Expand Up @@ -266,3 +267,107 @@ def _create_metadata_dict(
}

return _create_metadata_dict


def generate_chunk_manifest(
netcdf4_file: str,
shape: tuple[int, ...],
chunks: tuple[int, ...],
offset=6144,
length=48,
) -> ChunkManifest:
chunk_dict = {}
num_chunks = [shape[i] // chunks[i] for i in range(len(shape))]
offset = offset

# Generate all possible chunk indices using Cartesian product
for chunk_indices in product(*[range(n) for n in num_chunks]):
chunk_index = ".".join(map(str, chunk_indices))
chunk_dict[chunk_index] = {
"path": netcdf4_file,
"offset": offset,
"length": length,
}
offset += length # Increase offset for each chunk

return ChunkManifest(chunk_dict)


@pytest.fixture
def gen_virtual_variable(array_v3_metadata: Callable) -> Callable:
def _gen_virtual_variable(
file_uri: str,
shape: tuple[int, ...] = (3, 4),
chunk_shape: tuple[int, ...] = (3, 4),
dtype: np.dtype = np.dtype("int32"),
codecs: Optional[list[dict[Any, Any]]] = None,
fill_value: Optional[str] = None,
encoding: Optional[dict] = None,
offset: int = 6144,
length: int = 48,
dims: list[str] = [],
attrs: dict[str, Any] = {},
) -> xr.Variable:
manifest = generate_chunk_manifest(
file_uri,
shape=shape,
chunks=chunk_shape,
offset=offset,
length=length,
)
metadata = array_v3_metadata(
shape=shape,
chunks=chunk_shape,
codecs=codecs,
data_type=dtype,
fill_value=fill_value,
)
ma = ManifestArray(chunkmanifest=manifest, metadata=metadata)
return xr.Variable(
data=ma,
dims=dims,
encoding=encoding,
attrs=attrs,
)

return _gen_virtual_variable


@pytest.fixture
def gen_virtual_dataset(gen_virtual_variable: Callable) -> Callable:
def _gen_virtual_dataset(
file_uri: str,
shape: tuple[int, ...] = (3, 4),
chunk_shape: tuple[int, ...] = (3, 4),
dtype: np.dtype = np.dtype("int32"),
codecs: Optional[list[dict[Any, Any]]] = None,
fill_value: Optional[str] = None,
encoding: Optional[dict] = None,
variable_name: str = "foo",
offset: int = 6144,
length: int = 48,
dims: Optional[list[str]] = None,
coords: Optional[xr.Coordinates] = None,
) -> xr.Dataset:
with xr.open_dataset(file_uri) as ds:
var = gen_virtual_variable(
file_uri=file_uri,
shape=shape,
chunk_shape=chunk_shape,
dtype=dtype,
codecs=codecs,
fill_value=fill_value,
encoding=encoding,
offset=offset,
length=length,
dims=dims or [str(name) for name in ds.dims],
attrs=ds[variable_name].attrs,
)

return xr.Dataset(
{variable_name: var},
coords=coords,
attrs=ds.attrs,
)

return _gen_virtual_dataset
30 changes: 9 additions & 21 deletions virtualizarr/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,17 @@
from virtualizarr.translators.kerchunk import (
dataset_from_kerchunk_refs,
)
from virtualizarr.zarr import ZArray


def test_kerchunk_roundtrip_in_memory_no_concat():
def test_kerchunk_roundtrip_in_memory_no_concat(array_v3_metadata):
# Set up example xarray dataset
chunks_dict = {
"0.0": {"path": "/foo.nc", "offset": 100, "length": 100},
"0.1": {"path": "/foo.nc", "offset": 200, "length": 100},
}
manifest = ChunkManifest(entries=chunks_dict)
marr = ManifestArray(
zarray=dict(
shape=(2, 4),
dtype=np.dtype("<i8"),
chunks=(2, 2),
compressor=None,
filters=None,
fill_value=None,
order="C",
),
metadata=array_v3_metadata(shape=(2, 4), chunks=(2, 4)),
chunkmanifest=manifest,
)
vds = xr.Dataset({"a": (["x", "y"], marr)})
Expand Down Expand Up @@ -230,24 +221,21 @@ def test_non_dimension_coordinates(self, tmpdir, roundtrip_func, hdf_backend):
for coord in ds.coords:
assert ds.coords[coord].attrs == roundtrip.coords[coord].attrs

def test_datetime64_dtype_fill_value(self, tmpdir, roundtrip_func):
def test_datetime64_dtype_fill_value(
self, tmpdir, roundtrip_func, array_v3_metadata
):
chunks_dict = {
"0.0.0": {"path": "/foo.nc", "offset": 100, "length": 100},
}
manifest = ChunkManifest(entries=chunks_dict)
chunks = (1, 1, 1)
shape = (1, 1, 1)
zarray = ZArray(
chunks=chunks,
compressor={"id": "zlib", "level": 1},
dtype=np.dtype("<M8[ns]"),
# fill_value=0.0,
filters=None,
order="C",
metadata = array_v3_metadata(
shape=shape,
zarr_format=2,
chunks=chunks,
codecs=[{"name": "numcodecs.zlib", "configuration": {"level": 1}}],
)
marr1 = ManifestArray(zarray=zarray, chunkmanifest=manifest)
marr1 = ManifestArray(metadata=metadata, chunkmanifest=manifest)
vds = xr.Dataset(
{
"a": xr.DataArray(
Expand Down
6 changes: 5 additions & 1 deletion virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,11 @@ def test_stack_empty(self, array_v3_metadata):
# both manifest arrays in this example have the same metadata properties
chunks = (5, 10)
shape = (5, 20)
metadata = array_v3_metadata(shape=shape, chunks=chunks)
metadata = array_v3_metadata(
shape=shape,
chunks=chunks,
codecs=[{"name": "numcodecs.zlib", "configuration": {"level": 1}}],
)

chunks_dict1 = {}
manifest1 = ChunkManifest(entries=chunks_dict1, shape=(1, 2))
Expand Down
11 changes: 4 additions & 7 deletions virtualizarr/tests/test_writers/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,17 @@


@pytest.fixture
def vds_with_manifest_arrays() -> Dataset:
def vds_with_manifest_arrays(array_v3_metadata) -> Dataset:
arr = ManifestArray(
chunkmanifest=ChunkManifest(
entries={"0.0": dict(path="/test.nc", offset=6144, length=48)}
),
zarray=dict(
metadata=array_v3_metadata(
shape=(2, 3),
dtype=np.dtype("<i8"),
data_type=np.dtype("<i8"),
chunks=(2, 3),
compressor={"id": "zlib", "level": 1},
filters=None,
codecs=[{"name": "numcodecs.zlib", "configuration": {"level": 1}}],
fill_value=0,
order="C",
zarr_format=3,
),
)
var = Variable(dims=["x", "y"], data=arr, attrs={"units": "km"})
Expand Down
Loading

0 comments on commit d05cec3

Please sign in to comment.