Skip to content

Commit

Permalink
zarr-python v3 compatibility (#516)
Browse files Browse the repository at this point in the history

Co-authored-by: Nathan Zimmerman <npzimmerman@gmail.com>
Co-authored-by: Martin Durant <martin.durant@alumni.utoronto.ca>
  • Loading branch information
3 people authored Jan 30, 2025
1 parent 5940fe2 commit d2b00a9
Show file tree
Hide file tree
Showing 35 changed files with 1,082 additions and 700 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [310, 311, 312]
python-version: [311, 312]

steps:
- uses: actions/checkout@v4
Expand All @@ -19,8 +19,9 @@ jobs:
- name: Install kerchunk
shell: bash -l {0}
run: |
pip install -e .
pip install -e . --no-deps
pip list
- name: Test with pytest
shell: bash -l {0}
run: |
pytest -v --cov
pytest -v --timeout 60 --cov
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
rev: v4.6.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.3.0
rev: 25.1.0
hooks:
- id: black
exclude: ^docs/
- repo: https://github.com/pycqa/flake8
rev: '4.0.1'
rev: '7.1.1'
hooks:
- id: flake8
exclude: tests/|^docs/|__init__.py
4 changes: 2 additions & 2 deletions ci/environment-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.10
- python=3.12
- dask
- zarr
- xarray>=2024.10.0
Expand All @@ -17,7 +17,7 @@ dependencies:
- aiohttp
- pytest-cov
- pytest-subtests
- fsspec<=2024.12.0
- fsspec
- dask
- scipy
- s3fs
Expand Down
38 changes: 0 additions & 38 deletions ci/environment-py310.yml

This file was deleted.

9 changes: 5 additions & 4 deletions ci/environment-py311.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ dependencies:
- cfgrib
# Temporary workaround for #508
- eccodes <2.38

- ujson
- cftime
- astropy
- requests
- aiohttp
- pytest-cov
- pytest-subtests
- fsspec<=2024.12.0
- pytest-timeout
- dask
- scipy
- s3fs
Expand All @@ -34,5 +34,6 @@ dependencies:
- tifffile
- rioxarray
- netCDF4
# - pip:
# - git+https://github.com/fsspec/filesystem_spec
- pip:
- git+https://github.com/fsspec/filesystem_spec
- git+https://github.com/zarr-developers/zarr-python
9 changes: 5 additions & 4 deletions ci/environment-py312.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ dependencies:
- cfgrib
# Temporary workaround for #508
- eccodes <2.38

- ujson
- cftime
- astropy
- requests
- aiohttp
- pytest-cov
- pytest-subtests
- fsspec<=2024.12.0
- pytest-timeout
- gcsfs
- dask
- scipy
Expand All @@ -34,5 +34,6 @@ dependencies:
- tifffile
- rioxarray
- netCDF4
# - pip:
# - git+https://github.com/fsspec/filesystem_spec
- pip:
- git+https://github.com/fsspec/filesystem_spec
- git+https://github.com/zarr-developers/zarr-python
2 changes: 1 addition & 1 deletion kerchunk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import codecs
from kerchunk import codecs

from importlib.metadata import version as _version

Expand Down
3 changes: 2 additions & 1 deletion kerchunk/_grib_idx.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,8 @@ def _extract_single_group(grib_group: dict, idx: int, storage_options: Dict):
return None

dt = xr.open_datatree(
fsspec.filesystem("reference", fo=grib_tree_store).get_mapper(""),
"reference://",
storage_options={"fo": grib_tree_store},
engine="zarr",
consolidated=False,
)
Expand Down
81 changes: 79 additions & 2 deletions kerchunk/codecs.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import ast
from dataclasses import dataclass
import io

import numcodecs
from numcodecs.abc import Codec
import numpy as np
import threading
import zlib
from zarr.core.array_spec import ArraySpec
from zarr.abc.codec import ArrayBytesCodec
from zarr.core.buffer import Buffer, NDBuffer
from zarr.core.common import JSON, parse_named_configuration
from zarr.registry import register_codec


class FillStringsCodec(Codec):
Expand Down Expand Up @@ -115,6 +121,78 @@ def decode(self, buf, out=None):
numcodecs.register_codec(GRIBCodec, "grib")


@dataclass(frozen=True)
class GRIBZarrCodec(ArrayBytesCodec):
eclock = threading.RLock()

var: str
dtype: np.dtype

def __init__(self, *, var: str, dtype: np.dtype) -> None:
object.__setattr__(self, "var", var)
object.__setattr__(self, "dtype", dtype)

@classmethod
def from_dict(cls, data: dict[str, JSON]) -> "GRIBZarrCodec":
_, configuration_parsed = parse_named_configuration(
data, "bytes", require_configuration=True
)
configuration_parsed = configuration_parsed or {}
return cls(**configuration_parsed) # type: ignore[arg-type]

def to_dict(self) -> dict[str, JSON]:
if self.endian is None:
return {"name": "grib"}
else:
return {
"name": "grib",
"configuration": {"var": self.var, "dtype": self.dtype},
}

async def _decode_single(
self,
chunk_bytes: Buffer,
chunk_spec: ArraySpec,
) -> NDBuffer:
assert isinstance(chunk_bytes, Buffer)
import eccodes

if self.var in ["latitude", "longitude"]:
var = self.var + "s"
dt = self.dtype or "float64"
else:
var = "values"
dt = self.dtype or "float32"

with self.eclock:
mid = eccodes.codes_new_from_message(chunk_bytes.to_bytes())
try:
data = eccodes.codes_get_array(mid, var)
missingValue = eccodes.codes_get_string(mid, "missingValue")
if var == "values" and missingValue:
data[data == float(missingValue)] = np.nan
return data.astype(dt, copy=False)

finally:
eccodes.codes_release(mid)

async def _encode_single(
self,
chunk_array: NDBuffer,
chunk_spec: ArraySpec,
) -> Buffer | None:
# This is a one way codec
raise NotImplementedError

def compute_encoded_size(
self, input_byte_length: int, _chunk_spec: ArraySpec
) -> int:
raise NotImplementedError


register_codec("grib", GRIBZarrCodec)


class AsciiTableCodec(numcodecs.abc.Codec):
"""Decodes ASCII-TABLE extensions in FITS files"""

Expand Down Expand Up @@ -166,7 +244,6 @@ def decode(self, buf, out=None):
arr2 = np.empty((self.nrow,), dtype=dt_out)
heap = buf[arr.nbytes :]
for name in dt_out.names:

if dt_out[name] == "O":
dt = np.dtype(self.ftypes[self.types[name]])
counts = arr[name][:, 0]
Expand Down Expand Up @@ -245,7 +322,7 @@ class ZlibCodec(Codec):
codec_id = "zlib"

def __init__(self):
...
pass

def decode(self, data, out=None):
if out:
Expand Down
Loading

0 comments on commit d2b00a9

Please sign in to comment.