From f5a74beba260bf3d8ffa7ce2c6483803c2094710 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 00:05:27 -0400 Subject: [PATCH 01/13] try making ujson an optional dep --- pyproject.toml | 7 ++++--- virtualizarr/accessor.py | 3 ++- virtualizarr/manifests/manifest.py | 3 ++- virtualizarr/readers/kerchunk.py | 3 ++- virtualizarr/tests/__init__.py | 1 + virtualizarr/tests/test_backend.py | 3 ++- virtualizarr/tests/test_kerchunk.py | 3 +++ virtualizarr/tests/test_readers/test_kerchunk.py | 6 +++++- virtualizarr/tests/test_writers/test_kerchunk.py | 5 ++++- virtualizarr/writers/kerchunk.py | 3 ++- virtualizarr/zarr.py | 3 ++- 11 files changed, 29 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5af632ce..73da10ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,12 +22,10 @@ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ "xarray>=2024.06.0", - "kerchunk>=0.2.5", - "h5netcdf", "numpy>=2.0.0", - "ujson", "packaging", "universal-pathlib", + "numcodecs" ] [project.optional-dependencies] @@ -35,7 +33,9 @@ test = [ "codecov", "fastparquet", "fsspec", + "h5netcdf", "h5py", + "kerchunk>=0.2.5", "mypy", "netcdf4", "pandas-stubs", @@ -47,6 +47,7 @@ test = [ "ruff", "s3fs", "scipy", + "ujson", ] diff --git a/virtualizarr/accessor.py b/virtualizarr/accessor.py index 0a97237e..cc251e63 100644 --- a/virtualizarr/accessor.py +++ b/virtualizarr/accessor.py @@ -5,7 +5,6 @@ overload, ) -import ujson # type: ignore from xarray import Dataset, register_dataset_accessor from virtualizarr.manifests import ManifestArray @@ -91,6 +90,8 @@ def to_kerchunk( if format == "dict": return refs elif format == "json": + import ujson + if filepath is None: raise ValueError("Filepath must be provided when format is 'json'") diff --git a/virtualizarr/manifests/manifest.py b/virtualizarr/manifests/manifest.py index a6d160ed..88ac9a91 100644 --- a/virtualizarr/manifests/manifest.py +++ b/virtualizarr/manifests/manifest.py @@ -5,7 +5,6 @@ from typing import Any, Callable, Dict, NewType, Tuple, TypedDict, cast import numpy as np -from upath import UPath from virtualizarr.types import ChunkKey @@ -41,6 +40,8 @@ class ChunkEntry: def from_kerchunk( cls, path_and_byte_range_info: tuple[str] | tuple[str, int, int] ) -> "ChunkEntry": + from upath import UPath + if len(path_and_byte_range_info) == 1: path = path_and_byte_range_info[0] offset = 0 diff --git a/virtualizarr/readers/kerchunk.py b/virtualizarr/readers/kerchunk.py index c274ee5a..d3632b68 100644 --- a/virtualizarr/readers/kerchunk.py +++ b/virtualizarr/readers/kerchunk.py @@ -2,7 +2,6 @@ from pathlib import Path from typing import Any, MutableMapping, Optional, cast -import ujson # type: ignore from xarray import Dataset from xarray.core.indexes import Index from xarray.core.variable import Variable @@ -300,6 +299,8 @@ def fully_decode_arr_refs(d: dict) -> KerchunkArrRefs: """ Only have to do this because kerchunk.SingleHdf5ToZarr apparently doesn't bother converting .zarray and .zattrs contents to dicts, see https://github.com/fsspec/kerchunk/issues/415 . """ + import ujson + sanitized = d.copy() for k, v in d.items(): if k.startswith("."): diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py index 7df13d10..a2080164 100644 --- a/virtualizarr/tests/__init__.py +++ b/virtualizarr/tests/__init__.py @@ -33,6 +33,7 @@ def _importorskip( has_astropy, requires_astropy = _importorskip("astropy") +has_kerchunk, requires_kerchunk = _importorskip("kerchunk") has_s3fs, requires_s3fs = _importorskip("s3fs") has_tifffile, requires_tifffile = _importorskip("tifffile") diff --git a/virtualizarr/tests/test_backend.py b/virtualizarr/tests/test_backend.py index 731c4acc..272d7d48 100644 --- a/virtualizarr/tests/test_backend.py +++ b/virtualizarr/tests/test_backend.py @@ -1,7 +1,6 @@ from collections.abc import Mapping from unittest.mock import patch -import fsspec import numpy as np import pytest import xarray as xr @@ -237,6 +236,8 @@ def test_read_from_url(self, filetype, url): assert isinstance(vds, xr.Dataset) def test_virtualizarr_vs_local_nisar(self): + import fsspec + # Open group directly from locally cached file with xarray url = "https://nisar.asf.earthdatacloud.nasa.gov/NISAR-SAMPLE-DATA/GCOV/ALOS1_Rosamond_20081012/NISAR_L2_PR_GCOV_001_005_A_219_4020_SHNA_A_20081012T060910_20081012T060926_P01101_F_N_J_001.h5" tmpfile = fsspec.open_local( diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py index 2442ec8d..7b1eef4f 100644 --- a/virtualizarr/tests/test_kerchunk.py +++ b/virtualizarr/tests/test_kerchunk.py @@ -7,8 +7,10 @@ dataset_from_kerchunk_refs, find_var_names, ) +from virtualizarr.tests import requires_kerchunk +@requires_kerchunk def test_kerchunk_roundtrip_in_memory_no_concat(): # Set up example xarray dataset chunks_dict = { @@ -40,6 +42,7 @@ def test_kerchunk_roundtrip_in_memory_no_concat(): xrt.assert_equal(roundtrip, ds) +@requires_kerchunk def test_no_duplicates_find_var_names(): """Verify that we get a deduplicated list of var names""" ref_dict = {"refs": {"x/something": {}, "x/otherthing": {}}} diff --git a/virtualizarr/tests/test_readers/test_kerchunk.py b/virtualizarr/tests/test_readers/test_kerchunk.py index a8127509..589d7c71 100644 --- a/virtualizarr/tests/test_readers/test_kerchunk.py +++ b/virtualizarr/tests/test_readers/test_kerchunk.py @@ -1,10 +1,10 @@ import numpy as np -import ujson # type: ignore from virtualizarr.manifests import ManifestArray from virtualizarr.readers.kerchunk import ( dataset_from_kerchunk_refs, ) +from virtualizarr.tests import requires_kerchunk def gen_ds_refs( @@ -24,6 +24,7 @@ def gen_ds_refs( } +@requires_kerchunk def test_dataset_from_df_refs(): ds_refs = gen_ds_refs() ds = dataset_from_kerchunk_refs(ds_refs) @@ -45,7 +46,10 @@ def test_dataset_from_df_refs(): } +@requires_kerchunk def test_dataset_from_df_refs_with_filters(): + import ujson + filters = [{"elementsize": 4, "id": "shuffle"}, {"id": "zlib", "level": 4}] zarray = { "chunks": [2, 3], diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py index ff53f1ae..efef2013 100644 --- a/virtualizarr/tests/test_writers/test_kerchunk.py +++ b/virtualizarr/tests/test_writers/test_kerchunk.py @@ -1,6 +1,5 @@ import numpy as np import pandas as pd -import ujson # type: ignore from xarray import Dataset from virtualizarr.manifests import ChunkManifest, ManifestArray @@ -40,6 +39,8 @@ def test_accessor_to_kerchunk_dict(self): assert result_ds_refs == expected_ds_refs def test_accessor_to_kerchunk_json(self, tmp_path): + import ujson + manifest = ChunkManifest( entries={"0.0": dict(path="test.nc", offset=6144, length=48)} ) @@ -77,6 +78,8 @@ def test_accessor_to_kerchunk_json(self, tmp_path): assert loaded_refs == expected_ds_refs def test_accessor_to_kerchunk_parquet(self, tmp_path): + import ujson + chunks_dict = { "0.0": {"path": "foo.nc", "offset": 100, "length": 100}, "0.1": {"path": "foo.nc", "offset": 200, "length": 100}, diff --git a/virtualizarr/writers/kerchunk.py b/virtualizarr/writers/kerchunk.py index 6b4b55f8..3a0bd27b 100644 --- a/virtualizarr/writers/kerchunk.py +++ b/virtualizarr/writers/kerchunk.py @@ -3,7 +3,6 @@ from typing import cast import numpy as np -import ujson # type: ignore from xarray import Dataset from xarray.coding.times import CFDatetimeCoder from xarray.core.variable import Variable @@ -30,6 +29,8 @@ def dataset_to_kerchunk_refs(ds: Dataset) -> KerchunkStoreRefs: Create a dictionary containing kerchunk-style store references from a single xarray.Dataset (which wraps ManifestArray objects). """ + import ujson + all_arr_refs = {} for var_name, var in ds.variables.items(): arr_refs = variable_to_kerchunk_arr_refs(var, str(var_name)) diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py index f62b1269..f50e24b0 100644 --- a/virtualizarr/zarr.py +++ b/virtualizarr/zarr.py @@ -3,7 +3,6 @@ import numcodecs import numpy as np -import ujson # type: ignore if TYPE_CHECKING: pass @@ -100,6 +99,8 @@ def dict(self) -> dict[str, Any]: return zarray_dict def to_kerchunk_json(self) -> str: + import ujson + zarray_dict = self.dict() if zarray_dict["fill_value"] is np.nan: zarray_dict["fill_value"] = None From 91be89c6f5ea25eba57bd25bd4566edefbb5261f Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 00:25:16 -0400 Subject: [PATCH 02/13] skip all tests which require kerchunk --- pyproject.toml | 2 +- virtualizarr/tests/__init__.py | 1 + virtualizarr/tests/test_backend.py | 16 +++++++++++++++- virtualizarr/tests/test_integration.py | 4 ++++ virtualizarr/tests/test_manifests/test_array.py | 4 +++- virtualizarr/tests/test_utils.py | 2 ++ virtualizarr/tests/test_writers/test_kerchunk.py | 2 ++ virtualizarr/tests/test_xarray.py | 3 +++ 8 files changed, 31 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 73da10ba..d4669a31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "numpy>=2.0.0", "packaging", "universal-pathlib", - "numcodecs" + "numcodecs", ] [project.optional-dependencies] diff --git a/virtualizarr/tests/__init__.py b/virtualizarr/tests/__init__.py index a2080164..70f613ce 100644 --- a/virtualizarr/tests/__init__.py +++ b/virtualizarr/tests/__init__.py @@ -35,6 +35,7 @@ def _importorskip( has_astropy, requires_astropy = _importorskip("astropy") has_kerchunk, requires_kerchunk = _importorskip("kerchunk") has_s3fs, requires_s3fs = _importorskip("s3fs") +has_scipy, requires_scipy = _importorskip("scipy") has_tifffile, requires_tifffile = _importorskip("tifffile") diff --git a/virtualizarr/tests/test_backend.py b/virtualizarr/tests/test_backend.py index 272d7d48..81a23e0c 100644 --- a/virtualizarr/tests/test_backend.py +++ b/virtualizarr/tests/test_backend.py @@ -12,9 +12,17 @@ from virtualizarr.backend import FileType from virtualizarr.manifests import ManifestArray from virtualizarr.readers.kerchunk import _automatically_determine_filetype -from virtualizarr.tests import has_astropy, has_tifffile, network, requires_s3fs +from virtualizarr.tests import ( + has_astropy, + has_tifffile, + network, + requires_kerchunk, + requires_s3fs, + requires_scipy, +) +@requires_scipy def test_automatically_determine_filetype_netcdf3_netcdf4(): # test the NetCDF3 vs NetCDF4 automatic file type selection @@ -74,6 +82,7 @@ def test_FileType(): FileType(None) +@requires_kerchunk class TestOpenVirtualDatasetIndexes: def test_no_indexes(self, netcdf4_file): vds = open_virtual_dataset(netcdf4_file, indexes={}) @@ -104,6 +113,7 @@ def index_mappings_equal(indexes1: Mapping[str, Index], indexes2: Mapping[str, I return True +@requires_kerchunk def test_cftime_index(tmpdir): """Ensure a virtual dataset contains the same indexes as an Xarray dataset""" # Note: Test was created to debug: https://github.com/zarr-developers/VirtualiZarr/issues/168 @@ -129,6 +139,7 @@ def test_cftime_index(tmpdir): assert vds.attrs == ds.attrs +@requires_kerchunk class TestOpenVirtualDatasetAttrs: def test_drop_array_dimensions(self, netcdf4_file): # regression test for GH issue #150 @@ -267,6 +278,7 @@ def test_virtualizarr_vs_local_nisar(self): xrt.assert_equal(dsXR, dsV) +@requires_kerchunk class TestLoadVirtualDataset: def test_loadable_variables(self, netcdf4_file): vars_to_load = ["air", "time"] @@ -339,6 +351,7 @@ def test_open_dataset_with_scalar(self, hdf5_scalar, tmpdir): assert vds.scalar.attrs == {"scalar": "true"} +@requires_kerchunk @pytest.mark.parametrize( "reference_format", ["json", "parquet", "invalid"], @@ -396,6 +409,7 @@ def test_open_virtual_dataset_existing_kerchunk_refs( assert set(vds.variables) == set(netcdf4_virtual_dataset.variables) +@requires_kerchunk def test_notimplemented_read_inline_refs(tmp_path, netcdf4_inlined_ref): # For now, we raise a NotImplementedError if we read existing references that have inlined data # https://github.com/zarr-developers/VirtualiZarr/pull/251#pullrequestreview-2361916932 diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py index 5894f643..18a24e9f 100644 --- a/virtualizarr/tests/test_integration.py +++ b/virtualizarr/tests/test_integration.py @@ -6,9 +6,11 @@ from virtualizarr import open_virtual_dataset from virtualizarr.manifests.array import ManifestArray from virtualizarr.manifests.manifest import ChunkManifest +from virtualizarr.tests import requires_kerchunk from virtualizarr.zarr import ZArray +@requires_kerchunk @pytest.mark.parametrize( "inline_threshold, vars_to_inline", [ @@ -45,6 +47,7 @@ def test_numpy_arrays_to_inlined_kerchunk_refs( assert refs["refs"]["time/0"] == expected["refs"]["time/0"] +@requires_kerchunk @pytest.mark.parametrize("format", ["dict", "json", "parquet"]) class TestKerchunkRoundtrip: def test_kerchunk_roundtrip_no_concat(self, tmpdir, format): @@ -212,6 +215,7 @@ def test_datetime64_dtype_fill_value(self, tmpdir, format): assert roundtrip.a.attrs == ds.a.attrs +@requires_kerchunk def test_open_scalar_variable(tmpdir): # regression test for GH issue #100 diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 6d5ede79..9031195f 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -2,7 +2,7 @@ import pytest from virtualizarr.manifests import ChunkManifest, ManifestArray -from virtualizarr.tests import create_manifestarray +from virtualizarr.tests import create_manifestarray, requires_kerchunk from virtualizarr.zarr import ZArray @@ -35,6 +35,7 @@ def test_create_manifestarray(self): assert marr.size == 5 * 2 * 20 assert marr.ndim == 3 + @requires_kerchunk def test_create_manifestarray_from_kerchunk_refs(self): arr_refs = { ".zarray": '{"chunks":[2,3],"compressor":null,"dtype":" Date: Thu, 17 Oct 2024 00:45:58 -0400 Subject: [PATCH 03/13] add new CI job --- .github/workflows/min-deps.yml | 60 ++++++++++++++++++++++++++++++++++ ci/min-deps.yml | 24 ++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 .github/workflows/min-deps.yml create mode 100644 ci/min-deps.yml diff --git a/.github/workflows/min-deps.yml b/.github/workflows/min-deps.yml new file mode 100644 index 00000000..9ca7f92b --- /dev/null +++ b/.github/workflows/min-deps.yml @@ -0,0 +1,60 @@ +name: CI + +on: + push: + branches: [ "main" ] + paths-ignore: + - 'docs/**' + pull_request: + branches: [ "main" ] + paths-ignore: + - 'docs/**' + schedule: + - cron: "0 0 * * *" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + test: + name: ${{ matrix.python-version }}-build + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + strategy: + matrix: + python-version: ["3.12"] + steps: + - uses: actions/checkout@v4 + + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ci/min-deps.yml + cache-environment: true + create-args: >- + python=${{matrix.python-version}} + + - name: Install virtualizarr + run: | + python -m pip install -e . --no-deps + - name: Conda list information + run: | + conda env list + conda list + + - name: Running Tests + run: | + python -m pytest ./virtualizarr --cov=./ --cov-report=xml --verbose + + - name: Upload code coverage to Codecov + uses: codecov/codecov-action@v3.1.4 + with: + file: ./coverage.xml + flags: unittests + env_vars: OS,PYTHON + name: codecov-umbrella + fail_ci_if_error: false diff --git a/ci/min-deps.yml b/ci/min-deps.yml new file mode 100644 index 00000000..5b1e97c1 --- /dev/null +++ b/ci/min-deps.yml @@ -0,0 +1,24 @@ +name: virtualizarr-min-deps +channels: + - conda-forge + - nodefaults +dependencies: + - h5netcdf + - h5py + - hdf5 + - netcdf4 + - xarray>=2024.6.0 + - numpy>=2.0.0 + - packaging + - universal_pathlib + # Testing + - codecov + - pre-commit + - mypy + - ruff + - pandas-stubs + - pytest-mypy + - pytest-cov + - pytest + - pooch + - fsspec From 0b9b697140d1168148f021d7be78658879bd4a39 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 01:14:39 -0400 Subject: [PATCH 04/13] rename git workflow --- .github/workflows/min-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/min-deps.yml b/.github/workflows/min-deps.yml index 9ca7f92b..066e1ba3 100644 --- a/.github/workflows/min-deps.yml +++ b/.github/workflows/min-deps.yml @@ -1,4 +1,4 @@ -name: CI +name: min-deps on: push: From 97d55cb2001a3c70d60db24e0feb646578f1346d Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 01:17:52 -0400 Subject: [PATCH 05/13] move numcodecs import inside --- virtualizarr/zarr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py index f50e24b0..cd83a67d 100644 --- a/virtualizarr/zarr.py +++ b/virtualizarr/zarr.py @@ -1,7 +1,6 @@ import dataclasses from typing import TYPE_CHECKING, Any, Literal, NewType, cast -import numcodecs import numpy as np if TYPE_CHECKING: @@ -154,6 +153,8 @@ def _v3_codec_pipeline(self) -> list: post_compressor: Iterable[BytesBytesCodec] #optional ``` """ + import numcodecs + if self.filters: filter_codecs_configs = [ numcodecs.get_codec(filter).get_config() for filter in self.filters From 379830a81f8328e64984fe31d5cd6c34d0ed8515 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 01:20:39 -0400 Subject: [PATCH 06/13] add numcodecs to CI env --- ci/min-deps.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/min-deps.yml b/ci/min-deps.yml index 5b1e97c1..1f8bd521 100644 --- a/ci/min-deps.yml +++ b/ci/min-deps.yml @@ -9,6 +9,7 @@ dependencies: - netcdf4 - xarray>=2024.6.0 - numpy>=2.0.0 + - numcodecs - packaging - universal_pathlib # Testing From 9f622ed02bab7dde268526de9ce663e89f6dcbac Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 14:28:31 -0400 Subject: [PATCH 07/13] make ujson required --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d4669a31..d216b269 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "packaging", "universal-pathlib", "numcodecs", + "ujson", ] [project.optional-dependencies] @@ -47,7 +48,6 @@ test = [ "ruff", "s3fs", "scipy", - "ujson", ] From aa13eb741fe8f842675da664a32e7f0e804912db Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 14:29:37 -0400 Subject: [PATCH 08/13] unskip tests for parsing in-memory kerchunk dicts --- virtualizarr/tests/test_readers/test_kerchunk.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/virtualizarr/tests/test_readers/test_kerchunk.py b/virtualizarr/tests/test_readers/test_kerchunk.py index 589d7c71..9e734d9c 100644 --- a/virtualizarr/tests/test_readers/test_kerchunk.py +++ b/virtualizarr/tests/test_readers/test_kerchunk.py @@ -24,7 +24,6 @@ def gen_ds_refs( } -@requires_kerchunk def test_dataset_from_df_refs(): ds_refs = gen_ds_refs() ds = dataset_from_kerchunk_refs(ds_refs) @@ -46,7 +45,6 @@ def test_dataset_from_df_refs(): } -@requires_kerchunk def test_dataset_from_df_refs_with_filters(): import ujson From e4e3d7eff8eac762424d7a9bb8b568cc5ab8f763 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:36:28 +0000 Subject: [PATCH 09/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/tests/test_readers/test_kerchunk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/virtualizarr/tests/test_readers/test_kerchunk.py b/virtualizarr/tests/test_readers/test_kerchunk.py index 9e734d9c..0faa1ff2 100644 --- a/virtualizarr/tests/test_readers/test_kerchunk.py +++ b/virtualizarr/tests/test_readers/test_kerchunk.py @@ -4,7 +4,6 @@ from virtualizarr.readers.kerchunk import ( dataset_from_kerchunk_refs, ) -from virtualizarr.tests import requires_kerchunk def gen_ds_refs( From b702f2550f9beff4c66fa578a58f586dd58b4117 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 14:52:36 -0400 Subject: [PATCH 10/13] add ujson to CI environment --- ci/min-deps.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/min-deps.yml b/ci/min-deps.yml index 1f8bd521..7ca8c0b3 100644 --- a/ci/min-deps.yml +++ b/ci/min-deps.yml @@ -11,6 +11,7 @@ dependencies: - numpy>=2.0.0 - numcodecs - packaging + - ujson - universal_pathlib # Testing - codecov From 32aaac2dde5923fad79d0d410103672fa98e3ee0 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 15:04:04 -0400 Subject: [PATCH 11/13] in-memory roundtrip doesn't require kerchunk --- virtualizarr/tests/test_kerchunk.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py index 7b1eef4f..2442ec8d 100644 --- a/virtualizarr/tests/test_kerchunk.py +++ b/virtualizarr/tests/test_kerchunk.py @@ -7,10 +7,8 @@ dataset_from_kerchunk_refs, find_var_names, ) -from virtualizarr.tests import requires_kerchunk -@requires_kerchunk def test_kerchunk_roundtrip_in_memory_no_concat(): # Set up example xarray dataset chunks_dict = { @@ -42,7 +40,6 @@ def test_kerchunk_roundtrip_in_memory_no_concat(): xrt.assert_equal(roundtrip, ds) -@requires_kerchunk def test_no_duplicates_find_var_names(): """Verify that we get a deduplicated list of var names""" ref_dict = {"refs": {"x/something": {}, "x/otherthing": {}}} From f59963fddc6235125a6d12040f6321d4fb3f4e09 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Thu, 17 Oct 2024 15:07:03 -0400 Subject: [PATCH 12/13] move in-memory kerchunk roundtrip test to test_integration.py --- virtualizarr/tests/test_integration.py | 44 ++++++++++++++++++++++-- virtualizarr/tests/test_kerchunk.py | 46 -------------------------- 2 files changed, 42 insertions(+), 48 deletions(-) diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py index 18a24e9f..434d12d7 100644 --- a/virtualizarr/tests/test_integration.py +++ b/virtualizarr/tests/test_integration.py @@ -4,12 +4,52 @@ import xarray.testing as xrt from virtualizarr import open_virtual_dataset -from virtualizarr.manifests.array import ManifestArray -from virtualizarr.manifests.manifest import ChunkManifest +from virtualizarr.manifests import ChunkManifest, ManifestArray +from virtualizarr.readers.kerchunk import ( + dataset_from_kerchunk_refs, + find_var_names, +) from virtualizarr.tests import requires_kerchunk from virtualizarr.zarr import ZArray +def test_kerchunk_roundtrip_in_memory_no_concat(): + # Set up example xarray dataset + chunks_dict = { + "0.0": {"path": "foo.nc", "offset": 100, "length": 100}, + "0.1": {"path": "foo.nc", "offset": 200, "length": 100}, + } + manifest = ChunkManifest(entries=chunks_dict) + marr = ManifestArray( + zarray=dict( + shape=(2, 4), + dtype=np.dtype(" Date: Thu, 17 Oct 2024 15:07:23 -0400 Subject: [PATCH 13/13] remove now-empty test_kerchunk.py file --- virtualizarr/tests/test_kerchunk.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 virtualizarr/tests/test_kerchunk.py diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py deleted file mode 100644 index e69de29b..00000000