diff --git a/.git_archival.txt b/.git_archival.txt index 95cb3eea4e3..8fb235d7045 100644 --- a/.git_archival.txt +++ b/.git_archival.txt @@ -1 +1,4 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ ref-names: $Format:%D$ diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 935729c055f..bdae56ae6db 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -119,7 +119,7 @@ jobs: python xarray/util/print_versions.py - name: Install mypy run: | - python -m pip install mypy + python -m pip install 'mypy<0.990' - name: Run mypy run: | @@ -134,6 +134,64 @@ jobs: name: codecov-umbrella fail_ci_if_error: false + mypy38: + name: Mypy 3.8 + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + # temporarily skipping due to https://github.com/pydata/xarray/issues/6551 + if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: + run: + shell: bash -l {0} + env: + CONDA_ENV_FILE: ci/requirements/environment.yml + PYTHON_VERSION: "3.8" + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: set environment variables + run: | + echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + - name: Setup micromamba + uses: mamba-org/provision-with-micromamba@v14 + with: + environment-file: ${{env.CONDA_ENV_FILE}} + environment-name: xarray-tests + extra-specs: | + python=${{env.PYTHON_VERSION}} + conda + cache-env: true + cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Install mypy + run: | + python -m pip install 'mypy<0.990' + + - name: Run mypy + run: | + python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report + + - name: Upload mypy coverage to Codecov + uses: codecov/codecov-action@v3.1.1 + with: + file: mypy_report/cobertura.xml + flags: mypy38 + env_vars: PYTHON_VERSION + name: codecov-umbrella + fail_ci_if_error: false + + + min-version-policy: name: Minimum Version Policy runs-on: "ubuntu-latest" diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 8c74b6988c0..68bd0c15067 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -80,6 +80,7 @@ jobs: if: success() id: status run: | + export ZARR_V3_EXPERIMENTAL_API=1 python -m pytest --timeout=60 -rf \ --report-log output-${{ matrix.python-version }}-log.jsonl - name: Generate and publish the report @@ -88,6 +89,6 @@ jobs: && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository_owner == 'pydata' - uses: xarray-contrib/issue-from-pytest-log@v1.1 + uses: xarray-contrib/issue-from-pytest-log@v1 with: log-path: output-${{ matrix.python-version }}-log.jsonl diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index da02b230730..67dd54faf3a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # https://pre-commit.com/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v4.4.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -10,7 +10,7 @@ repos: - id: mixed-line-ending # This wants to go before isort & flake8 - repo: https://github.com/PyCQA/autoflake - rev: "v1.7.7" + rev: "v2.0.0" hooks: - id: autoflake # isort should run before black as black sometimes tweaks the isort output args: ["--in-place", "--ignore-init-module-imports"] @@ -19,7 +19,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v3.2.0 + rev: v3.2.2 hooks: - id: pyupgrade args: @@ -31,14 +31,14 @@ repos: - id: black - id: black-jupyter - repo: https://github.com/keewis/blackdoc - rev: v0.3.7 + rev: v0.3.8 hooks: - id: blackdoc exclude: "generate_aggregations.py" additional_dependencies: ["black==22.10.0"] - id: blackdoc-autoupdate-black - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 # - repo: https://github.com/Carreau/velin @@ -47,7 +47,7 @@ repos: # - id: velin # args: ["--write", "--compact"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.982 + rev: v0.991 hooks: - id: mypy # Copied from setup.cfg diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 5de6d6a4f76..b43e4d1e4e8 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -58,8 +58,7 @@ // "pip+emcee": [""], // emcee is only available for install with pip. // }, "matrix": { - "setuptools_scm[toml]": [""], // GH6609 - "setuptools_scm_git_archive": [""], // GH6609 + "setuptools_scm": [""], // GH6609 "numpy": [""], "pandas": [""], "netcdf4": [""], diff --git a/asv_bench/benchmarks/renaming.py b/asv_bench/benchmarks/renaming.py new file mode 100644 index 00000000000..3ade5d8df70 --- /dev/null +++ b/asv_bench/benchmarks/renaming.py @@ -0,0 +1,27 @@ +import numpy as np + +import xarray as xr + + +class SwapDims: + param_names = ["size"] + params = [[int(1e3), int(1e5), int(1e7)]] + + def setup(self, size: int) -> None: + self.ds = xr.Dataset( + {"a": (("x", "t"), np.ones((size, 2)))}, + coords={ + "x": np.arange(size), + "y": np.arange(size), + "z": np.arange(size), + "x2": ("x", np.arange(size)), + "y2": ("y", np.arange(size)), + "z2": ("z", np.arange(size)), + }, + ) + + def time_swap_dims(self, size: int) -> None: + self.ds.swap_dims({"x": "xn", "y": "yn", "z": "zn"}) + + def time_swap_dims_newindex(self, size: int) -> None: + self.ds.swap_dims({"x": "x2", "y": "y2", "z": "z2"}) diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py index 3cc10c7ef32..23621c55560 100755 --- a/ci/min_deps_check.py +++ b/ci/min_deps_check.py @@ -30,12 +30,12 @@ POLICY_MONTHS = {"python": 24, "numpy": 18} POLICY_MONTHS_DEFAULT = 12 POLICY_OVERRIDE: Dict[str, Tuple[int, int]] = {} -has_errors = False +errors = [] def error(msg: str) -> None: - global has_errors - has_errors = True + global errors + errors.append(msg) print("ERROR:", msg) @@ -48,7 +48,7 @@ def parse_requirements(fname) -> Iterator[Tuple[str, int, int, Optional[int]]]: Yield (package name, major version, minor version, [patch version]) """ - global has_errors + global errors with open(fname) as fh: contents = yaml.safe_load(fh) @@ -157,9 +157,9 @@ def process_pkg( status = "> (!)" delta = relativedelta(datetime.now(), policy_published_actual).normalized() n_months = delta.years * 12 + delta.months - error( - f"Package is too new: {pkg}={req_major}.{req_minor} was " - f"published on {versions[req_major, req_minor]:%Y-%m-%d} " + warning( + f"Package is too new: {pkg}={policy_major}.{policy_minor} was " + f"published on {versions[policy_major, policy_minor]:%Y-%m-%d} " f"which was {n_months} months ago (policy is {policy_months} months)" ) else: @@ -193,13 +193,18 @@ def main() -> None: for pkg, major, minor, patch in parse_requirements(fname) ] - print("Package Required Policy Status") + print("\nPackage Required Policy Status") print("----------------- -------------------- -------------------- ------") fmt = "{:17} {:7} ({:10}) {:7} ({:10}) {}" for row in rows: print(fmt.format(*row)) - assert not has_errors + if errors: + print("\nErrors:") + print("-------") + for i, e in enumerate(errors): + print(f"{i+1}. {e}") + sys.exit(1) if __name__ == "__main__": diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index a673e4a14c7..d30c94348d0 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -30,7 +30,6 @@ dependencies: - pip - pseudonetcdf - pydap - # - pynio # not compatible with netCDF4>1.5.3, see #4491 - pytest - pytest-cov - pytest-env diff --git a/ci/requirements/bare-minimum.yml b/ci/requirements/bare-minimum.yml index 7f35ccae987..fe1af1af027 100644 --- a/ci/requirements/bare-minimum.yml +++ b/ci/requirements/bare-minimum.yml @@ -11,5 +11,5 @@ dependencies: - pytest-env - pytest-xdist - numpy=1.20 - - packaging=21.0 + - packaging=21.3 - pandas=1.3 diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 8401e31a8fc..22bfa3543d3 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -30,7 +30,6 @@ dependencies: - pre-commit - pseudonetcdf - pydap - # - pynio # Not available on Windows - pytest - pytest-cov - pytest-env diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 2d71233a610..d6bc8466c76 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -34,7 +34,6 @@ dependencies: - pre-commit - pseudonetcdf - pydap - # - pynio # not compatible with netCDF4>1.5.3, see #4491 - pytest - pytest-cov - pytest-env diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 1a2307aee5e..c7e157992f0 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -8,40 +8,38 @@ dependencies: # When upgrading python, numpy, or pandas, must also change # doc/user-guide/installing.rst, doc/user-guide/plotting.rst and setup.py. - python=3.8 - - boto3=1.18 + - boto3=1.20 - bottleneck=1.3 - - cartopy=0.19 + - cartopy=0.20 - cdms2=3.1 - cfgrib=0.9 - cftime=1.5 - coveralls - - dask-core=2021.09 - - distributed=2021.09 + - dask-core=2021.11 + - distributed=2021.11 - flox=0.5 - h5netcdf=0.11 - # h5py and hdf5 tend to cause conflicrs + # h5py and hdf5 tend to cause conflicts # for e.g. hdf5 1.12 conflicts with h5py=3.1 # prioritize bumping other packages instead - - h5py=3.1 - - hdf5=1.10 + - h5py=3.6 + - hdf5=1.12 - hypothesis - iris=3.1 - lxml=4.6 # Optional dep of pydap - - matplotlib-base=3.4 - - nc-time-axis=1.3 + - matplotlib-base=3.5 + - nc-time-axis=1.4 # netcdf follows a 1.major.minor[.patch] convention # (see https://github.com/Unidata/netcdf4-python/issues/1090) - # bumping the netCDF4 version is currently blocked by #4491 - - netcdf4=1.5.3 + - netcdf4=1.5.7 - numba=0.54 - numpy=1.20 - - packaging=21.0 + - packaging=21.3 - pandas=1.3 - - pint=0.17 + - pint=0.18 - pip - - pseudonetcdf=3.1 + - pseudonetcdf=3.2 - pydap=3.2 - # - pynio=1.5.5 - pytest - pytest-cov - pytest-env @@ -51,7 +49,7 @@ dependencies: - seaborn=0.11 - sparse=0.13 - toolz=0.11 - - typing_extensions=3.10 + - typing_extensions=4.0 - zarr=2.10 - pip: - numbagg==0.1 diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index d357843cdda..7124b0f87a5 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -8,7 +8,7 @@ Required dependencies - Python (3.8 or later) - `numpy `__ (1.20 or later) -- `packaging `__ (21.0 or later) +- `packaging `__ (21.3 or later) - `pandas `__ (1.3 or later) .. _optional-dependencies: diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index beab5fc050b..f2d3c6cc9d9 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -162,11 +162,77 @@ To do so, pass a ``group`` keyword argument to the :py:func:`open_dataset` function. The group can be specified as a path-like string, e.g., to access subgroup ``'bar'`` within group ``'foo'`` pass ``'/foo/bar'`` as the ``group`` argument. + In a similar way, the ``group`` keyword argument can be given to the :py:meth:`Dataset.to_netcdf` method to write to a group in a netCDF file. When writing multiple groups in one file, pass ``mode='a'`` to :py:meth:`Dataset.to_netcdf` to ensure that each call does not delete the file. +For example: + +.. ipython:: + :verbatim: + + In [1]: ds1 = xr.Dataset({"a": 0}) + + In [2]: ds2 = xr.Dataset({"b": 1}) + + In [3]: ds1.to_netcdf("file.nc", group="A") + + In [4]: ds2.to_netcdf("file.nc", group="B", mode="a") + +We can verify that two groups have been saved using the ncdump command-line utility. + +.. code:: bash + + $ ncdump file.nc + netcdf file { + + group: A { + variables: + int64 a ; + data: + + a = 0 ; + } // group A + + group: B { + variables: + int64 b ; + data: + + b = 1 ; + } // group B + } + +Either of these groups can be loaded from the file as an independent :py:class:`Dataset` object: + +.. ipython:: + :verbatim: + + In [1]: group1 = xr.open_dataset("file.nc", group="A") + + In [2]: group1 + Out[2]: + + Dimensions: () + Data variables: + a int64 ... + + In [3]: group2 = xr.open_dataset("file.nc", group="B") + + In [4]: group2 + Out[4]: + + Dimensions: () + Data variables: + b int64 ... + +.. note:: + + For native handling of multiple groups with xarray, including I/O, you might be interested in the experimental + `xarray-datatree `_ package. + .. _io.encoding: @@ -1209,6 +1275,10 @@ We recommend installing cfgrib via conda:: Formats supported by PyNIO -------------------------- +.. warning:: + + The PyNIO backend is deprecated_. PyNIO is no longer maintained_. See + Xarray can also read GRIB, HDF4 and other file formats supported by PyNIO_, if PyNIO is installed. To use PyNIO to read such files, supply ``engine='pynio'`` to :py:func:`open_dataset`. @@ -1217,12 +1287,9 @@ We recommend installing PyNIO via conda:: conda install -c conda-forge pynio -.. warning:: - - PyNIO is no longer actively maintained and conflicts with netcdf4 > 1.5.3. - The PyNIO backend may be moved outside of xarray in the future. - .. _PyNIO: https://www.pyngl.ucar.edu/Nio.shtml +.. _deprecated: https://github.com/pydata/xarray/issues/4491 +.. _maintained: https://github.com/NCAR/pynio/issues/53 .. _io.PseudoNetCDF: diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst index 3a4e7ea88c6..95bf21a71b0 100644 --- a/doc/user-guide/reshaping.rst +++ b/doc/user-guide/reshaping.rst @@ -20,7 +20,7 @@ Reordering dimensions To reorder dimensions on a :py:class:`~xarray.DataArray` or across all variables on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An -ellipsis (`...`) can be use to represent all other dimensions: +ellipsis (`...`) can be used to represent all other dimensions: .. ipython:: python diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6465c1e88ae..00dbe80485b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,12 +14,79 @@ What's New np.random.seed(123456) +.. _whats-new.2022.11.1: -.. _whats-new.2022.10.1: - -v2022.10.1 (unreleased) +v2022.11.1 (unreleased) ----------------------- +New Features +~~~~~~~~~~~~ +- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample` + and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`7284`). By `Spencer + Clark `_. +- Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`). + By `Gregory Lee `_ and `Joe Hamman `_. + +Breaking changes +~~~~~~~~~~~~~~~~ + +- The minimum versions of some dependencies were changed (:pull:`7300`): + + ========================== ========= ======== + Package Old New + ========================== ========= ======== + boto 1.18 1.20 + cartopy 0.19 0.20 + distributed 2021.09 2021.11 + dask 2021.09 2021.11 + h5py 3.1 3.6 + hdf5 1.10 1.12 + matplotlib-base 3.4 3.5 + nc-time-axis 1.3 1.4 + netcdf4 1.5.3 1.5.7 + packaging 20.3 21.3 + pint 0.17 0.18 + pseudonetcdf 3.1 3.2 + typing_extensions 3.10 4.0 + ========================== ========= ======== + +Deprecations +~~~~~~~~~~~~ +- The PyNIO backend has been deprecated (:issue:`4491`, :pull:`7301`). + By `Joe Hamman `_. + +Bug fixes +~~~~~~~~~ +- Fix handling of coordinate attributes in :py:func:`where`. (:issue:`7220`, :pull:`7229`) + By `Sam Levang `_. +- Import ``nc_time_axis`` when needed (:issue:`7275`, :pull:`7276`). + By `Michael Niklas `_. +- Fix static typing of :py:meth:`xr.polyval` (:issue:`7312`, :pull:`7315`). + By `Michael Niklas `_. +- Fix multiple reads on fsspec S3 files by resetting file pointer to 0 when reading file streams (:issue:`6813`, :pull:`7304`). + By `David Hoese `_ and `Wei Ji Leong `_. + +Documentation +~~~~~~~~~~~~~ + +- Add example of reading and writing individual groups to a single netCDF file to I/O docs page. (:pull:`7338`) + By `Tom Nicholas `_. + +Internal Changes +~~~~~~~~~~~~~~~~ + + +.. _whats-new.2022.11.0: + +v2022.11.0 (Nov 4, 2022) +------------------------ + +This release brings a number of bugfixes and documentation improvements. Both text and HTML +reprs now have a new "Indexes" section, which we expect will help with development of new +Index objects. This release also features more support for the Python Array API. + +Many thanks to the 16 contributors to this release: Daniel Goman, Deepak Cherian, Illviljan, Jessica Scheick, Justus Magin, Mark Harfouche, Maximilian Roos, Mick, Patrick Naylor, Pierre, Spencer Clark, Stephan Hoyer, Tom Nicholas, Tom White + New Features ~~~~~~~~~~~~ @@ -2534,7 +2601,7 @@ Breaking changes have removed the internal use of the ``OrderedDict`` in favor of Python's builtin ``dict`` object which is now ordered itself. This change will be most obvious when interacting with the ``attrs`` property on Dataset and DataArray objects. - (:issue:`3380`, :pull:`3389`). By `Joe Hamman `_. + (:issue:`3380`, :pull:`3389`). HBy `Joeamman `_. New functions/methods ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pyproject.toml b/pyproject.toml index 271abc0aab1..fec91210aea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,7 @@ build-backend = "setuptools.build_meta" requires = [ "setuptools>=42", - "setuptools-scm[toml]>=3.4", - "setuptools-scm-git-archive", + "setuptools-scm>=7", ] [tool.setuptools_scm] @@ -27,6 +26,7 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"] exclude = 'xarray/util/generate_.*\.py' files = "xarray" show_error_codes = true +warn_unused_ignores = true # Most of the numerical computing stack doesn't have type annotations yet. [[tool.mypy.overrides]] @@ -40,8 +40,6 @@ module = [ "cfgrib.*", "cftime.*", "cupy.*", - "dask.*", - "distributed.*", "fsspec.*", "h5netcdf.*", "h5py.*", @@ -52,11 +50,9 @@ module = [ "Nio.*", "nc_time_axis.*", "numbagg.*", - "numpy.*", "netCDF4.*", "netcdftime.*", "pandas.*", - "pint.*", "pooch.*", "PseudoNetCDF.*", "pydap.*", @@ -70,8 +66,6 @@ module = [ "zarr.*", ] -# version spanning code is hard to type annotate (and most of this module will -# be going away soon anyways) [[tool.mypy.overrides]] ignore_errors = true -module = "xarray.core.pycompat" +module = [] diff --git a/requirements.txt b/requirements.txt index e7015650c8b..4eb5c4a6b67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ # https://help.github.com/en/github/visualizing-repository-data-with-graphs/listing-the-packages-that-a-repository-depends-on numpy >= 1.20 -packaging >= 21.0 +packaging >= 21.3 pandas >= 1.3 diff --git a/setup.cfg b/setup.cfg index 72dca2dec63..7919908e8ec 100644 --- a/setup.cfg +++ b/setup.cfg @@ -77,7 +77,7 @@ python_requires = >=3.8 install_requires = numpy >= 1.20 # recommended to use >= 1.22 for full quantile method support pandas >= 1.3 - packaging >= 21.0 + packaging >= 21.3 [options.extras_require] io = @@ -147,11 +147,12 @@ markers = [flake8] ignore = - E203 # whitespace before ':' - doesn't work well with black - E402 # module level import not at top of file - E501 # line too long - let black worry about that - E731 # do not assign a lambda expression, use a def - W503 # line break before binary operator + # E203: whitespace before ':' - doesn't work well with black + # E402: module level import not at top of file + # E501: line too long - let black worry about that + # E731: do not assign a lambda expression, use a def + # W503: line break before binary operator + E203, E402, E501, E731, W503 exclude = .eggs doc diff --git a/xarray/__init__.py b/xarray/__init__.py index 46dcf0e9b32..e920e94ca19 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -40,7 +40,7 @@ from importlib.metadata import version as _version except ImportError: # if the fallback library is missing, we are doomed. - from importlib_metadata import version as _version # type: ignore[no-redef] + from importlib_metadata import version as _version try: __version__ = _version("xarray") diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 13bcf046ac3..02cf425386b 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -9,6 +9,7 @@ TYPE_CHECKING, Any, Callable, + Dict, Final, Hashable, Iterable, @@ -62,7 +63,7 @@ str, # no nice typing support for custom backends None, ] - T_Chunks = Union[int, dict[Any, Any], Literal["auto"], None] + T_Chunks = Union[int, Dict[Any, Any], Literal["auto"], None] T_NetcdfTypes = Literal[ "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC" ] @@ -916,7 +917,7 @@ def open_mfdataset( >>> lon_bnds, lat_bnds = (-110, -105), (40, 45) >>> partial_func = partial(_preprocess, lon_bnds=lon_bnds, lat_bnds=lat_bnds) >>> ds = xr.open_mfdataset( - ... "file_*.nc", concat_dim="time", preprocess=_preprocess + ... "file_*.nc", concat_dim="time", preprocess=partial_func ... ) # doctest: +SKIP References @@ -1503,6 +1504,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> backends.ZarrStore: ... @@ -1524,6 +1526,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> Delayed: ... @@ -1542,6 +1545,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> backends.ZarrStore | Delayed: """This function creates an appropriate datastore for writing a dataset to a zarr ztore @@ -1608,6 +1612,13 @@ def to_zarr( f"``region`` with to_zarr(), got {append_dim} in both" ) + if zarr_version is None: + # default to 2 if store doesn't specify it's version (e.g. a path) + zarr_version = int(getattr(store, "_store_version", 2)) + + if consolidated is None and zarr_version > 2: + consolidated = False + if mode == "r+": already_consolidated = consolidated consolidate_on_close = False @@ -1626,6 +1637,7 @@ def to_zarr( write_region=region, safe_chunks=safe_chunks, stacklevel=4, # for Dataset.to_zarr() + zarr_version=zarr_version, ) if mode in ["a", "r+"]: diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 10033587bf1..6a6f54704ac 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -4,7 +4,6 @@ import io import os -import numpy as np from packaging.version import Version from ..core import indexing @@ -46,9 +45,6 @@ def __getitem__(self, key): ) def _getitem(self, key): - # h5py requires using lists for fancy indexing: - # https://github.com/h5py/h5py/issues/992 - key = tuple(list(k) if isinstance(k, np.ndarray) else k for k in key) with self.datastore.lock: array = self.get_array(needs_lock=False) return array[key] diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py index 73144df2b41..1de8fc8f7b8 100644 --- a/xarray/backends/locks.py +++ b/xarray/backends/locks.py @@ -61,7 +61,7 @@ def _get_lock_maker(scheduler=None): try: from dask.distributed import Lock as DistributedLock except ImportError: - DistributedLock = None # type: ignore + DistributedLock = None return DistributedLock else: raise KeyError(scheduler) diff --git a/xarray/backends/lru_cache.py b/xarray/backends/lru_cache.py index 0f7f4c23b2e..c60ef3fc6b3 100644 --- a/xarray/backends/lru_cache.py +++ b/xarray/backends/lru_cache.py @@ -30,7 +30,7 @@ class LRUCache(MutableMapping[K, V]): __slots__ = ("_cache", "_lock", "_maxsize", "_on_evict") - def __init__(self, maxsize: int, on_evict: Callable[[K, V], Any] = None): + def __init__(self, maxsize: int, on_evict: Callable[[K, V], Any] | None = None): """ Parameters ---------- diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 7b37b1a573a..f8b3192a4b9 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -232,11 +232,11 @@ def _extract_nc4_variable_encoding( "shuffle", "_FillValue", "dtype", + "compression", } if lsd_okay: valid_encodings.add("least_significant_digit") if h5py_okay: - valid_encodings.add("compression") valid_encodings.add("compression_opts") if not raise_on_invalid and encoding.get("chunksizes") is not None: diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index 07fb2b94673..cb7c65c0432 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -1,5 +1,7 @@ from __future__ import annotations +import warnings + import numpy as np from ..core import indexing @@ -55,6 +57,12 @@ class NioDataStore(AbstractDataStore): def __init__(self, filename, mode="r", lock=None, **kwargs): import Nio + warnings.warn( + "The PyNIO backend is Deprecated and will be removed from Xarray in a future release. " + "See https://github.com/pydata/xarray/issues/4491 for more information", + DeprecationWarning, + ) + if lock is None: lock = PYNIO_LOCK self.lock = ensure_lock(lock) @@ -94,6 +102,15 @@ def close(self): class PynioBackendEntrypoint(BackendEntrypoint): + """ + PyNIO backend + + .. deprecated:: 0.20.0 + + Deprecated as PyNIO is no longer supported. See + https://github.com/pydata/xarray/issues/4491 for more information + """ + available = module_available("Nio") def open_dataset( diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 6c547f82252..cca2d89678f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -353,6 +353,7 @@ def open_group( write_region=None, safe_chunks=True, stacklevel=2, + zarr_version=None, ): import zarr @@ -360,12 +361,29 @@ def open_group( if isinstance(store, os.PathLike): store = os.fspath(store) + if zarr_version is None: + # default to 2 if store doesn't specify it's version (e.g. a path) + zarr_version = getattr(store, "_store_version", 2) + open_kwargs = dict( mode=mode, synchronizer=synchronizer, path=group, ) open_kwargs["storage_options"] = storage_options + if zarr_version > 2: + open_kwargs["zarr_version"] = zarr_version + + if consolidated or consolidate_on_close: + raise ValueError( + "consolidated metadata has not been implemented for zarr " + f"version {zarr_version} yet. Set consolidated=False for " + f"zarr version {zarr_version}. See also " + "https://github.com/zarr-developers/zarr-specs/issues/136" + ) + + if consolidated is None: + consolidated = False if chunk_store: open_kwargs["chunk_store"] = chunk_store @@ -440,6 +458,11 @@ def open_store_variable(self, name, zarr_array): zarr_array, DIMENSION_KEY, try_nczarr ) attributes = dict(attributes) + + # TODO: this should not be needed once + # https://github.com/zarr-developers/zarr-python/issues/1269 is resolved. + attributes.pop("filters", None) + encoding = { "chunks": zarr_array.chunks, "preferred_chunks": dict(zip(dimensions, zarr_array.chunks)), @@ -668,6 +691,7 @@ def open_zarr( storage_options=None, decode_timedelta=None, use_cftime=None, + zarr_version=None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -725,6 +749,9 @@ def open_zarr( capability. Only works for stores that have already been consolidated. By default (`consolidate=None`), attempts to read consolidated metadata, falling back to read non-consolidated metadata if that fails. + + When the experimental ``zarr_version=3``, ``consolidated`` must be + either be ``None`` or ``False``. chunk_store : MutableMapping, optional A separate Zarr store only for chunk data. storage_options : dict, optional @@ -745,6 +772,10 @@ def open_zarr( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + zarr_version : int or None, optional + The desired zarr spec version to target (currently 2 or 3). The default + of None will attempt to determine the zarr version from ``store`` when + possible, otherwise defaulting to 2. Returns ------- @@ -782,6 +813,7 @@ def open_zarr( "chunk_store": chunk_store, "storage_options": storage_options, "stacklevel": 4, + "zarr_version": zarr_version, } ds = open_dataset( @@ -798,6 +830,7 @@ def open_zarr( backend_kwargs=backend_kwargs, decode_timedelta=decode_timedelta, use_cftime=use_cftime, + zarr_version=zarr_version, ) return ds @@ -842,6 +875,7 @@ def open_dataset( chunk_store=None, storage_options=None, stacklevel=3, + zarr_version=None, ): filename_or_obj = _normalize_path(filename_or_obj) @@ -855,6 +889,7 @@ def open_dataset( chunk_store=chunk_store, storage_options=storage_options, stacklevel=stacklevel + 1, + zarr_version=zarr_version, ) store_entrypoint = StoreBackendEntrypoint() diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a029f39c7b8..04b2d773e2e 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -207,6 +207,10 @@ def __mul__(self, other): return new_self * other return type(self)(n=other * self.n) + def as_timedelta(self): + """All Tick subclasses must implement an as_timedelta method.""" + raise NotImplementedError + def _get_day_of_month(other, day_option): """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index b9793e40279..7671be82b36 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -44,9 +44,9 @@ def count( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -116,9 +116,9 @@ def all( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -188,9 +188,9 @@ def any( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -261,9 +261,9 @@ def max( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -348,9 +348,9 @@ def min( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -435,9 +435,9 @@ def mean( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -527,9 +527,9 @@ def prod( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -634,9 +634,9 @@ def sum( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -741,9 +741,9 @@ def std( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -845,9 +845,9 @@ def var( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -948,9 +948,9 @@ def median( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1039,9 +1039,9 @@ def cumsum( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1132,9 +1132,9 @@ def cumprod( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1240,9 +1240,9 @@ def count( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -1306,9 +1306,9 @@ def all( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -1372,9 +1372,9 @@ def any( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. keep_attrs : bool or None, optional If True, ``attrs`` will be copied from the original object to the new one. If False, the new object will be @@ -1439,9 +1439,9 @@ def max( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1518,9 +1518,9 @@ def min( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1597,9 +1597,9 @@ def mean( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1681,9 +1681,9 @@ def prod( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1778,9 +1778,9 @@ def sum( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1875,9 +1875,9 @@ def std( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1969,9 +1969,9 @@ def var( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2062,9 +2062,9 @@ def median( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2145,9 +2145,9 @@ def cumsum( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2234,9 +2234,9 @@ def cumprod( Parameters ---------- - dim : str, Iterable of Hashable, or None, default: None + dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2317,7 +2317,7 @@ class DatasetGroupByAggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -2328,14 +2328,14 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> Dataset: raise NotImplementedError() def count( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -2424,7 +2424,7 @@ def count( def all( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -2513,7 +2513,7 @@ def all( def any( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -2602,7 +2602,7 @@ def any( def max( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -2709,7 +2709,7 @@ def max( def min( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -2816,7 +2816,7 @@ def min( def mean( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -2927,7 +2927,7 @@ def mean( def prod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -3057,7 +3057,7 @@ def prod( def sum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -3187,7 +3187,7 @@ def sum( def std( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -3314,7 +3314,7 @@ def std( def var( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -3441,7 +3441,7 @@ def var( def median( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -3537,7 +3537,7 @@ def median( def cumsum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -3631,7 +3631,7 @@ def cumsum( def cumprod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -3730,7 +3730,7 @@ class DatasetResampleAggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -3741,14 +3741,14 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> Dataset: raise NotImplementedError() def count( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -3837,7 +3837,7 @@ def count( def all( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -3926,7 +3926,7 @@ def all( def any( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -4015,7 +4015,7 @@ def any( def max( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -4122,7 +4122,7 @@ def max( def min( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -4229,7 +4229,7 @@ def min( def mean( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -4340,7 +4340,7 @@ def mean( def prod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -4470,7 +4470,7 @@ def prod( def sum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -4600,7 +4600,7 @@ def sum( def std( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -4727,7 +4727,7 @@ def std( def var( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -4854,7 +4854,7 @@ def var( def median( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -4950,7 +4950,7 @@ def median( def cumsum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5044,7 +5044,7 @@ def cumsum( def cumprod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5143,7 +5143,7 @@ class DataArrayGroupByAggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -5154,14 +5154,14 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> DataArray: raise NotImplementedError() def count( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -5243,7 +5243,7 @@ def count( def all( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -5325,7 +5325,7 @@ def all( def any( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -5407,7 +5407,7 @@ def any( def max( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5505,7 +5505,7 @@ def max( def min( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5603,7 +5603,7 @@ def min( def mean( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -5705,7 +5705,7 @@ def mean( def prod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -5824,7 +5824,7 @@ def prod( def sum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -5943,7 +5943,7 @@ def sum( def std( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -6059,7 +6059,7 @@ def std( def var( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -6175,7 +6175,7 @@ def var( def median( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6263,7 +6263,7 @@ def median( def cumsum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6353,7 +6353,7 @@ def cumsum( def cumprod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6448,7 +6448,7 @@ class DataArrayResampleAggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -6459,14 +6459,14 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> DataArray: raise NotImplementedError() def count( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -6548,7 +6548,7 @@ def count( def all( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -6630,7 +6630,7 @@ def all( def any( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, **kwargs: Any, @@ -6712,7 +6712,7 @@ def any( def max( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6810,7 +6810,7 @@ def max( def min( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -6908,7 +6908,7 @@ def min( def mean( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -7010,7 +7010,7 @@ def mean( def prod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -7129,7 +7129,7 @@ def prod( def sum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, min_count: int | None = None, @@ -7248,7 +7248,7 @@ def sum( def std( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -7364,7 +7364,7 @@ def std( def var( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, ddof: int = 0, @@ -7480,7 +7480,7 @@ def var( def median( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -7568,7 +7568,7 @@ def median( def cumsum( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, @@ -7658,7 +7658,7 @@ def cumsum( def cumprod( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *, skipna: bool | None = None, keep_attrs: bool | None = None, diff --git a/xarray/core/_typed_ops.pyi b/xarray/core/_typed_ops.pyi index 46af53b1097..98a17a47cd5 100644 --- a/xarray/core/_typed_ops.pyi +++ b/xarray/core/_typed_ops.pyi @@ -77,157 +77,157 @@ class DataArrayOpsMixin: @overload def __add__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __add__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __add__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __add__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __sub__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __sub__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __sub__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __sub__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __mul__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __mul__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __mul__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __mul__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __pow__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __pow__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __pow__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __pow__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __truediv__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __truediv__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __truediv__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __truediv__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __floordiv__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __floordiv__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __floordiv__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __floordiv__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __mod__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __mod__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __mod__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __mod__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __and__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __and__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __and__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __and__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __xor__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __xor__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __xor__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __xor__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __or__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __or__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __or__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __or__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __lt__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __lt__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __lt__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __lt__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __le__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __le__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __le__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __le__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __gt__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __gt__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __gt__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __gt__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __ge__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __ge__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __ge__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __ge__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload # type: ignore[override] def __eq__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __eq__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __eq__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __eq__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload # type: ignore[override] def __ne__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __ne__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __ne__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __ne__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __radd__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __radd__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __radd__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __radd__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __rsub__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rsub__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __rsub__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __rsub__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __rmul__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rmul__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __rmul__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __rmul__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __rpow__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rpow__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __rpow__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __rpow__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __rtruediv__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rtruediv__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __rtruediv__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __rtruediv__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __rfloordiv__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rfloordiv__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __rfloordiv__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __rfloordiv__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __rmod__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rmod__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __rmod__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __rmod__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __rand__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rand__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __rand__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __rand__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __rxor__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rxor__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __rxor__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __rxor__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... @overload def __ror__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __ror__(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def __ror__(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def __ror__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ... def _inplace_binary_op(self, other, f): ... @@ -417,157 +417,157 @@ class DatasetGroupByOpsMixin: @overload def __add__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __add__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __add__(self, other: "DataArray") -> "Dataset": ... @overload def __add__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __sub__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __sub__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __sub__(self, other: "DataArray") -> "Dataset": ... @overload def __sub__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __mul__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __mul__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __mul__(self, other: "DataArray") -> "Dataset": ... @overload def __mul__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __pow__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __pow__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __pow__(self, other: "DataArray") -> "Dataset": ... @overload def __pow__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __truediv__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __truediv__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __truediv__(self, other: "DataArray") -> "Dataset": ... @overload def __truediv__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __floordiv__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __floordiv__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __floordiv__(self, other: "DataArray") -> "Dataset": ... @overload def __floordiv__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __mod__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __mod__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __mod__(self, other: "DataArray") -> "Dataset": ... @overload def __mod__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __and__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __and__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __and__(self, other: "DataArray") -> "Dataset": ... @overload def __and__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __xor__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __xor__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __xor__(self, other: "DataArray") -> "Dataset": ... @overload def __xor__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __or__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __or__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __or__(self, other: "DataArray") -> "Dataset": ... @overload def __or__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __lt__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __lt__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __lt__(self, other: "DataArray") -> "Dataset": ... @overload def __lt__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __le__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __le__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __le__(self, other: "DataArray") -> "Dataset": ... @overload def __le__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __gt__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __gt__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __gt__(self, other: "DataArray") -> "Dataset": ... @overload def __gt__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __ge__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __ge__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __ge__(self, other: "DataArray") -> "Dataset": ... @overload def __ge__(self, other: GroupByIncompatible) -> NoReturn: ... @overload # type: ignore[override] def __eq__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __eq__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __eq__(self, other: "DataArray") -> "Dataset": ... @overload def __eq__(self, other: GroupByIncompatible) -> NoReturn: ... @overload # type: ignore[override] def __ne__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __ne__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __ne__(self, other: "DataArray") -> "Dataset": ... @overload def __ne__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __radd__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __radd__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __radd__(self, other: "DataArray") -> "Dataset": ... @overload def __radd__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __rsub__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rsub__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __rsub__(self, other: "DataArray") -> "Dataset": ... @overload def __rsub__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __rmul__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rmul__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __rmul__(self, other: "DataArray") -> "Dataset": ... @overload def __rmul__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __rpow__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rpow__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __rpow__(self, other: "DataArray") -> "Dataset": ... @overload def __rpow__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __rtruediv__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rtruediv__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __rtruediv__(self, other: "DataArray") -> "Dataset": ... @overload def __rtruediv__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __rfloordiv__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rfloordiv__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __rfloordiv__(self, other: "DataArray") -> "Dataset": ... @overload def __rfloordiv__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __rmod__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rmod__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __rmod__(self, other: "DataArray") -> "Dataset": ... @overload def __rmod__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __rand__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rand__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __rand__(self, other: "DataArray") -> "Dataset": ... @overload def __rand__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __rxor__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __rxor__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __rxor__(self, other: "DataArray") -> "Dataset": ... @overload def __rxor__(self, other: GroupByIncompatible) -> NoReturn: ... @overload def __ror__(self, other: T_Dataset) -> T_Dataset: ... @overload - def __ror__(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def __ror__(self, other: "DataArray") -> "Dataset": ... @overload def __ror__(self, other: GroupByIncompatible) -> NoReturn: ... diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index e84c4ed2a8a..2c74d2bed1d 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -112,7 +112,7 @@ def _apply_str_ufunc( obj: Any, dtype: DTypeLike = None, output_core_dims: list | tuple = ((),), - output_sizes: Mapping[Any, int] = None, + output_sizes: Mapping[Any, int] | None = None, func_args: tuple = (), func_kwargs: Mapping = {}, ) -> Any: @@ -222,7 +222,7 @@ def _apply( func: Callable, dtype: DTypeLike = None, output_core_dims: list | tuple = ((),), - output_sizes: Mapping[Any, int] = None, + output_sizes: Mapping[Any, int] | None = None, func_args: tuple = (), func_kwargs: Mapping = {}, ) -> T_DataArray: @@ -850,7 +850,7 @@ def isupper(self) -> T_DataArray: return self._apply(func=lambda x: x.isupper(), dtype=bool) def count( - self, pat: str | bytes | Pattern | Any, flags: int = 0, case: bool = None + self, pat: str | bytes | Pattern | Any, flags: int = 0, case: bool | None = None ) -> T_DataArray: """ Count occurrences of pattern in each string of the array. @@ -1097,7 +1097,7 @@ def zfill(self, width: int | Any) -> T_DataArray: def contains( self, pat: str | bytes | Pattern | Any, - case: bool = None, + case: bool | None = None, flags: int = 0, regex: bool = True, ) -> T_DataArray: @@ -1170,7 +1170,7 @@ def func(x, ipat): def match( self, pat: str | bytes | Pattern | Any, - case: bool = None, + case: bool | None = None, flags: int = 0, ) -> T_DataArray: """ @@ -1531,7 +1531,7 @@ def replace( pat: str | bytes | Pattern | Any, repl: str | bytes | Callable | Any, n: int | Any = -1, - case: bool = None, + case: bool | None = None, flags: int = 0, regex: bool = True, ) -> T_DataArray: @@ -1603,7 +1603,7 @@ def extract( self, pat: str | bytes | Pattern | Any, dim: Hashable, - case: bool = None, + case: bool | None = None, flags: int = 0, ) -> T_DataArray: r""" @@ -1748,7 +1748,7 @@ def extractall( pat: str | bytes | Pattern | Any, group_dim: Hashable, match_dim: Hashable, - case: bool = None, + case: bool | None = None, flags: int = 0, ) -> T_DataArray: r""" @@ -1921,7 +1921,7 @@ def _get_res(val, ipat, imaxcount=maxcount, dtype=self._obj.dtype): def findall( self, pat: str | bytes | Pattern | Any, - case: bool = None, + case: bool | None = None, flags: int = 0, ) -> T_DataArray: r""" diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index df6f541c703..ef30d9afe85 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -141,10 +141,10 @@ def __init__( self, objects: Iterable[DataAlignable], join: str = "inner", - indexes: Mapping[Any, Any] = None, + indexes: Mapping[Any, Any] | None = None, exclude_dims: Iterable = frozenset(), exclude_vars: Iterable[Hashable] = frozenset(), - method: str = None, + method: str | None = None, tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value: Any = dtypes.NA, @@ -474,7 +474,7 @@ def override_indexes(self) -> None: if obj_idx is not None: for name, var in self.aligned_index_vars[key].items(): new_indexes[name] = aligned_idx - new_variables[name] = var.copy() + new_variables[name] = var.copy(deep=self.copy) objects[i + 1] = obj._overwrite_indexes(new_indexes, new_variables) @@ -490,7 +490,7 @@ def _get_dim_pos_indexers( obj_idx = matching_indexes.get(key) if obj_idx is not None: if self.reindex[key]: - indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs) # type: ignore[call-arg] + indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs) dim_pos_indexers.update(indexers) return dim_pos_indexers @@ -514,7 +514,7 @@ def _get_indexes_and_vars( if obj_idx is not None: for name, var in index_vars.items(): new_indexes[name] = aligned_idx - new_variables[name] = var.copy() + new_variables[name] = var.copy(deep=self.copy) return new_indexes, new_variables @@ -853,7 +853,7 @@ def is_alignable(obj): def reindex( obj: DataAlignable, indexers: Mapping[Any, Any], - method: str = None, + method: str | None = None, tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value: Any = dtypes.NA, @@ -892,7 +892,7 @@ def reindex( def reindex_like( obj: DataAlignable, other: Dataset | DataArray, - method: str = None, + method: str | None = None, tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value: Any = dtypes.NA, diff --git a/xarray/core/combine.py b/xarray/core/combine.py index fe4178eca61..f474d3beb19 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -377,7 +377,7 @@ def _nested_combine( # Define type for arbitrarily-nested list of lists recursively # Currently mypy cannot handle this but other linters can (https://stackoverflow.com/a/53845083/3154101) -DATASET_HYPERCUBE = Union[Dataset, Iterable["DATASET_HYPERCUBE"]] # type: ignore +DATASET_HYPERCUBE = Union[Dataset, Iterable["DATASET_HYPERCUBE"]] # type: ignore[misc] def combine_nested( @@ -669,7 +669,7 @@ def combine_by_coords( fill_value: object = dtypes.NA, join: JoinOptions = "outer", combine_attrs: CombineAttrsOptions = "no_conflicts", - datasets: Iterable[Dataset] = None, + datasets: Iterable[Dataset] | None = None, ) -> Dataset | DataArray: """ diff --git a/xarray/core/common.py b/xarray/core/common.py index 13fd91d8e99..d1387d62e99 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -44,7 +44,13 @@ from .indexes import Index from .resample import Resample from .rolling_exp import RollingExp - from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords + from .types import ( + DatetimeLike, + DTypeLikeSave, + ScalarOrArray, + SideOptions, + T_DataWithCoords, + ) from .variable import Variable DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]] @@ -770,7 +776,7 @@ def pipe( def rolling_exp( self: T_DataWithCoords, - window: Mapping[Any, int] = None, + window: Mapping[Any, int] | None = None, window_type: str = "span", **window_kwargs, ) -> RollingExp[T_DataWithCoords]: @@ -817,7 +823,9 @@ def _resample( skipna: bool | None, closed: SideOptions | None, label: SideOptions | None, - base: int, + base: int | None, + offset: pd.Timedelta | datetime.timedelta | str | None, + origin: str | DatetimeLike, keep_attrs: bool | None, loffset: datetime.timedelta | str | None, restore_coord_dims: bool | None, @@ -845,6 +853,18 @@ def _resample( For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -960,10 +980,24 @@ def _resample( if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex): from .resample_cftime import CFTimeGrouper - grouper = CFTimeGrouper(freq, closed, label, base, loffset) + grouper = CFTimeGrouper( + freq=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + origin=origin, + offset=offset, + ) else: grouper = pd.Grouper( - freq=freq, closed=closed, label=label, base=base, loffset=loffset + freq=freq, + closed=closed, + label=label, + base=base, + offset=offset, + origin=origin, + loffset=loffset, ) group = DataArray( dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 6ec38453a4b..41d529b1093 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -846,7 +846,7 @@ def apply_array_ufunc(func, *args, dask="forbidden"): def apply_ufunc( func: Callable, *args: Any, - input_core_dims: Sequence[Sequence] = None, + input_core_dims: Sequence[Sequence] | None = None, output_core_dims: Sequence[Sequence] | None = ((),), exclude_dims: AbstractSet = frozenset(), vectorize: bool = False, @@ -1734,7 +1734,7 @@ def dot( dim_counts.update(arr.dims) dims = tuple(d for d, c in dim_counts.items() if c > 1) - dot_dims: set[Hashable] = set(dims) # type:ignore[arg-type] + dot_dims: set[Hashable] = set(dims) # dimensions to be parallelized broadcast_dims = common_dims - dot_dims @@ -1855,15 +1855,13 @@ def where(cond, x, y, keep_attrs=None): Dataset.where, DataArray.where : equivalent methods """ + from .dataset import Dataset + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) - if keep_attrs is True: - # keep the attributes of x, the second parameter, by default to - # be consistent with the `where` method of `DataArray` and `Dataset` - keep_attrs = lambda attrs, context: getattr(x, "attrs", {}) # alignment for three arguments is complicated, so don't support it yet - return apply_ufunc( + result = apply_ufunc( duck_array_ops.where, cond, x, @@ -1874,24 +1872,53 @@ def where(cond, x, y, keep_attrs=None): keep_attrs=keep_attrs, ) + # keep the attributes of x, the second parameter, by default to + # be consistent with the `where` method of `DataArray` and `Dataset` + # rebuild the attrs from x at each level of the output, which could be + # Dataset, DataArray, or Variable, and also handle coords + if keep_attrs is True: + if isinstance(y, Dataset) and not isinstance(x, Dataset): + # handle special case where x gets promoted to Dataset + result.attrs = {} + if getattr(x, "name", None) in result.data_vars: + result[x.name].attrs = getattr(x, "attrs", {}) + else: + # otherwise, fill in global attrs and variable attrs (if they exist) + result.attrs = getattr(x, "attrs", {}) + for v in getattr(result, "data_vars", []): + result[v].attrs = getattr(getattr(x, v, None), "attrs", {}) + for c in getattr(result, "coords", []): + # always fill coord attrs of x + result[c].attrs = getattr(getattr(x, c, None), "attrs", {}) + + return result + @overload -def polyval(coord: DataArray, coeffs: DataArray, degree_dim: Hashable) -> DataArray: +def polyval( + coord: DataArray, coeffs: DataArray, degree_dim: Hashable = "degree" +) -> DataArray: ... @overload -def polyval(coord: DataArray, coeffs: Dataset, degree_dim: Hashable) -> Dataset: +def polyval( + coord: DataArray, coeffs: Dataset, degree_dim: Hashable = "degree" +) -> Dataset: ... @overload -def polyval(coord: Dataset, coeffs: DataArray, degree_dim: Hashable) -> Dataset: +def polyval( + coord: Dataset, coeffs: DataArray, degree_dim: Hashable = "degree" +) -> Dataset: ... @overload -def polyval(coord: Dataset, coeffs: Dataset, degree_dim: Hashable) -> Dataset: +def polyval( + coord: Dataset, coeffs: Dataset, degree_dim: Hashable = "degree" +) -> Dataset: ... @@ -2001,10 +2028,10 @@ def _calc_idxminmax( *, array, func: Callable, - dim: Hashable = None, - skipna: bool = None, + dim: Hashable | None = None, + skipna: bool | None = None, fill_value: Any = dtypes.NA, - keep_attrs: bool = None, + keep_attrs: bool | None = None, ): """Apply common operations for idxmin and idxmax.""" # This function doesn't make sense for scalars so don't try diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 47350b9403f..3a6b70f117a 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -2,7 +2,7 @@ import warnings from contextlib import contextmanager -from typing import TYPE_CHECKING, Any, Hashable, Iterator, Mapping, Sequence, cast +from typing import TYPE_CHECKING, Any, Hashable, Iterator, List, Mapping, Sequence import numpy as np import pandas as pd @@ -14,18 +14,27 @@ from .variable import Variable, calculate_dimensions if TYPE_CHECKING: + from .common import DataWithCoords from .dataarray import DataArray from .dataset import Dataset + from .types import T_DataArray # Used as the key corresponding to a DataArray's variable when converting # arbitrary DataArray objects to datasets _THIS_ARRAY = ReprObject("") +# TODO: Remove when min python version >= 3.9: +GenericAlias = type(List[int]) -class Coordinates(Mapping[Hashable, "DataArray"]): - __slots__ = () - def __getitem__(self, key: Hashable) -> DataArray: +class Coordinates(Mapping[Hashable, "T_DataArray"]): + _data: DataWithCoords + __slots__ = ("_data",) + + # TODO: Remove when min python version >= 3.9: + __class_getitem__ = classmethod(GenericAlias) + + def __getitem__(self, key: Hashable) -> T_DataArray: raise NotImplementedError() def __setitem__(self, key: Hashable, value: Any) -> None: @@ -45,11 +54,11 @@ def dtypes(self) -> Frozen[Hashable, np.dtype]: @property def indexes(self) -> Indexes[pd.Index]: - return self._data.indexes # type: ignore[attr-defined] + return self._data.indexes @property def xindexes(self) -> Indexes[Index]: - return self._data.xindexes # type: ignore[attr-defined] + return self._data.xindexes @property def variables(self): @@ -79,7 +88,7 @@ def __repr__(self) -> str: def to_dataset(self) -> Dataset: raise NotImplementedError() - def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: + def to_index(self, ordered_dims: Sequence[Hashable] | None = None) -> pd.Index: """Convert all index coordinates into a :py:class:`pandas.Index`. Parameters @@ -107,11 +116,9 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: raise ValueError("no valid index for a 0-dimensional object") elif len(ordered_dims) == 1: (dim,) = ordered_dims - return self._data.get_index(dim) # type: ignore[attr-defined] + return self._data.get_index(dim) else: - indexes = [ - self._data.get_index(k) for k in ordered_dims # type: ignore[attr-defined] - ] + indexes = [self._data.get_index(k) for k in ordered_dims] # compute the sizes of the repeat and tile for the cartesian product # (taken from pandas.core.reshape.util) @@ -238,6 +245,8 @@ class DatasetCoordinates(Coordinates): objects. """ + _data: Dataset + __slots__ = ("_data",) def __init__(self, dataset: Dataset): @@ -278,7 +287,7 @@ def variables(self) -> Mapping[Hashable, Variable]: def __getitem__(self, key: Hashable) -> DataArray: if key in self._data.data_vars: raise KeyError(key) - return cast("DataArray", self._data[key]) + return self._data[key] def to_dataset(self) -> Dataset: """Convert these coordinates into a new Dataset""" @@ -334,16 +343,18 @@ def _ipython_key_completions_(self): ] -class DataArrayCoordinates(Coordinates): +class DataArrayCoordinates(Coordinates["T_DataArray"]): """Dictionary like container for DataArray coordinates. Essentially a dict with keys given by the array's dimensions and the values given by corresponding DataArray objects. """ + _data: T_DataArray + __slots__ = ("_data",) - def __init__(self, dataarray: DataArray): + def __init__(self, dataarray: T_DataArray) -> None: self._data = dataarray @property @@ -366,7 +377,7 @@ def dtypes(self) -> Frozen[Hashable, np.dtype]: def _names(self) -> set[Hashable]: return set(self._data._coords) - def __getitem__(self, key: Hashable) -> DataArray: + def __getitem__(self, key: Hashable) -> T_DataArray: return self._data._getitem_coord(key) def _update_coords( @@ -452,7 +463,7 @@ def drop_coords( def assert_coordinate_consistent( - obj: DataArray | Dataset, coords: Mapping[Any, Variable] + obj: T_DataArray | Dataset, coords: Mapping[Any, Variable] ) -> None: """Make sure the dimension coordinate of obj is consistent with coords. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 15d1777b270..f939a2c8b6e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -78,6 +78,7 @@ from .rolling import DataArrayCoarsen, DataArrayRolling from .types import ( CoarsenBoundaryOptions, + DatetimeLike, DatetimeUnitOptions, Dims, ErrorOptions, @@ -377,10 +378,10 @@ def __init__( | Mapping[Any, Any] | None = None, dims: Hashable | Sequence[Hashable] | None = None, - name: Hashable = None, - attrs: Mapping = None, + name: Hashable | None = None, + attrs: Mapping | None = None, # internal parameters - indexes: dict[Hashable, Index] = None, + indexes: dict[Hashable, Index] | None = None, fastpath: bool = False, ) -> None: if fastpath: @@ -427,7 +428,7 @@ def __init__( # TODO(shoyer): document this argument, once it becomes part of the # public interface. - self._indexes = indexes # type: ignore[assignment] + self._indexes = indexes self._close = None @@ -452,7 +453,7 @@ def _construct_direct( def _replace( self: T_DataArray, - variable: Variable = None, + variable: Variable | None = None, coords=None, name: Hashable | None | Default = _default, indexes=None, @@ -495,9 +496,9 @@ def _replace_maybe_drop_dims( def _overwrite_indexes( self: T_DataArray, indexes: Mapping[Any, Index], - coords: Mapping[Any, Variable] = None, - drop_coords: list[Hashable] = None, - rename_dims: Mapping[Any, Any] = None, + coords: Mapping[Any, Variable] | None = None, + drop_coords: list[Hashable] | None = None, + rename_dims: Mapping[Any, Any] | None = None, ) -> T_DataArray: """Maybe replace indexes and their corresponding coordinates.""" if not indexes: @@ -1415,8 +1416,8 @@ def isel( def sel( self: T_DataArray, - indexers: Mapping[Any, Any] = None, - method: str = None, + indexers: Mapping[Any, Any] | None = None, + method: str | None = None, tolerance=None, drop: bool = False, **indexers_kwargs: Any, @@ -1953,7 +1954,7 @@ def reindex_like( def reindex( self: T_DataArray, - indexers: Mapping[Any, Any] = None, + indexers: Mapping[Any, Any] | None = None, method: ReindexMethodOptions = None, tolerance: float | Iterable[float] | None = None, copy: bool = True, @@ -2515,7 +2516,7 @@ def expand_dims( # https://github.com/python/mypy/issues/12846 is resolved def set_index( self, - indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None, + indexes: Mapping[Any, Hashable | Sequence[Hashable]] | None = None, append: bool = False, **indexes_kwargs: Hashable | Sequence[Hashable], ) -> DataArray: @@ -3613,7 +3614,7 @@ def combine_first(self: T_DataArray, other: T_DataArray) -> T_DataArray: def reduce( self: T_DataArray, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -3993,8 +3994,8 @@ def to_dict(self, data: bool = True, encoding: bool = False) -> dict[str, Any]: """ d = self.variable.to_dict(data=data) d.update({"coords": {}, "name": self.name}) - for k in self.coords: - d["coords"][k] = self.coords[k].variable.to_dict(data=data) + for k, coord in self.coords.items(): + d["coords"][k] = coord.variable.to_dict(data=data) if encoding: d["encoding"] = dict(self.encoding) return d @@ -4600,7 +4601,7 @@ def imag(self: T_DataArray) -> T_DataArray: def dot( self: T_DataArray, other: T_DataArray, - dims: Dims | ellipsis = None, + dims: Dims = None, ) -> T_DataArray: """Perform dot product of two DataArrays along their shared dims. @@ -4724,7 +4725,7 @@ def quantile( method: QuantileMethods = "linear", keep_attrs: bool | None = None, skipna: bool | None = None, - interpolation: QuantileMethods = None, + interpolation: QuantileMethods | None = None, ) -> T_DataArray: """Compute the qth quantile of the data along the specified dimension. @@ -5604,7 +5605,7 @@ def idxmax( # https://github.com/python/mypy/issues/12846 is resolved def argmin( self, - dim: Dims | ellipsis = None, + dim: Dims = None, axis: int | None = None, keep_attrs: bool | None = None, skipna: bool | None = None, @@ -5706,7 +5707,7 @@ def argmin( # https://github.com/python/mypy/issues/12846 is resolved def argmax( self, - dim: Dims | ellipsis = None, + dim: Dims = None, axis: int | None = None, keep_attrs: bool | None = None, skipna: bool | None = None, @@ -6531,7 +6532,9 @@ def resample( skipna: bool | None = None, closed: SideOptions | None = None, label: SideOptions | None = None, - base: int = 0, + base: int | None = None, + offset: pd.Timedelta | datetime.timedelta | str | None = None, + origin: str | DatetimeLike = "start_day", keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, @@ -6555,10 +6558,22 @@ def resample( Side of each interval to treat as closed. label : {"left", "right"}, optional Side of each interval to use for labeling. - base : int, default = 0 + base : int, optional For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -6640,6 +6655,8 @@ def resample( closed=closed, label=label, base=base, + offset=offset, + origin=origin, keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dbf5e46b2ad..4f376bdf811 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -107,6 +107,7 @@ CoarsenBoundaryOptions, CombineAttrsOptions, CompatOptions, + DatetimeLike, DatetimeUnitOptions, Dims, ErrorOptions, @@ -153,7 +154,7 @@ def _get_virtual_variable( - variables, key: Hashable, dim_sizes: Mapping = None + variables, key: Hashable, dim_sizes: Mapping | None = None ) -> tuple[Hashable, Hashable, Variable]: """Get a virtual variable (e.g., 'time.year') from a dict of xarray.Variable objects (if possible) @@ -831,7 +832,7 @@ def _dask_postcompute(self: T_Dataset, results: Iterable[Variable]) -> T_Dataset ) def _dask_postpersist( - self: T_Dataset, dsk: Mapping, *, rename: Mapping[str, str] = None + self: T_Dataset, dsk: Mapping, *, rename: Mapping[str, str] | None = None ) -> T_Dataset: from dask import is_dask_collection from dask.highlevelgraph import HighLevelGraph @@ -971,7 +972,7 @@ def _construct_direct( def _replace( self: T_Dataset, - variables: dict[Hashable, Variable] = None, + variables: dict[Hashable, Variable] | None = None, coord_names: set[Hashable] | None = None, dims: dict[Any, int] | None = None, attrs: dict[Hashable, Any] | None | Default = _default, @@ -1767,7 +1768,7 @@ def to_netcdf( format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, - encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, + encoding: Mapping[Any, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: bool = True, invalid_netcdf: bool = False, @@ -1783,7 +1784,7 @@ def to_netcdf( format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, - encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, + encoding: Mapping[Any, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: Literal[True] = True, invalid_netcdf: bool = False, @@ -1799,7 +1800,7 @@ def to_netcdf( format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, - encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, + encoding: Mapping[Any, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, *, compute: Literal[False], @@ -1814,7 +1815,7 @@ def to_netcdf( format: T_NetcdfTypes | None = None, group: str | None = None, engine: T_NetcdfEngine | None = None, - encoding: Mapping[Hashable, Mapping[str, Any]] | None = None, + encoding: Mapping[Any, Mapping[str, Any]] | None = None, unlimited_dims: Iterable[Hashable] | None = None, compute: bool = True, invalid_netcdf: bool = False, @@ -1930,6 +1931,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> ZarrStore: ... @@ -1967,6 +1969,7 @@ def to_zarr( region: Mapping[str, slice] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, + zarr_version: int | None = None, ) -> ZarrStore | Delayed: """Write dataset contents to a zarr group. @@ -2017,6 +2020,9 @@ def to_zarr( metadata; if False, do not. The default (`consolidated=None`) means write consolidated metadata and attempt to read consolidated metadata for existing stores (falling back to non-consolidated). + + When the experimental ``zarr_version=3``, ``consolidated`` must be + either be ``None`` or ``False``. append_dim : hashable, optional If set, the dimension along which the data will be appended. All other dimensions on overridden variables must remain the same size. @@ -2048,6 +2054,10 @@ def to_zarr( storage_options : dict, optional Any additional parameters for the storage backend (ignored for local paths). + zarr_version : int or None, optional + The desired zarr spec version to target (currently 2 or 3). The + default of None will attempt to determine the zarr version from + ``store`` when possible, otherwise defaulting to 2. Returns ------- @@ -2092,6 +2102,7 @@ def to_zarr( append_dim=append_dim, region=region, safe_chunks=safe_chunks, + zarr_version=zarr_version, ) def __repr__(self) -> str: @@ -2484,8 +2495,8 @@ def _isel_fancy( def sel( self: T_Dataset, - indexers: Mapping[Any, Any] = None, - method: str = None, + indexers: Mapping[Any, Any] | None = None, + method: str | None = None, tolerance: int | float | Iterable[int | float] | None = None, drop: bool = False, **indexers_kwargs: Any, @@ -2749,7 +2760,9 @@ def thin( return self.isel(indexers_slices) def broadcast_like( - self: T_Dataset, other: Dataset | DataArray, exclude: Iterable[Hashable] = None + self: T_Dataset, + other: Dataset | DataArray, + exclude: Iterable[Hashable] | None = None, ) -> T_Dataset: """Broadcast this DataArray against another Dataset or DataArray. This is equivalent to xr.broadcast(other, self)[1] @@ -3117,8 +3130,8 @@ def reindex( def _reindex( self: T_Dataset, - indexers: Mapping[Any, Any] = None, - method: str = None, + indexers: Mapping[Any, Any] | None = None, + method: str | None = None, tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value: Any = xrdtypes.NA, @@ -3144,7 +3157,7 @@ def interp( coords: Mapping[Any, Any] | None = None, method: InterpOptions = "linear", assume_sorted: bool = False, - kwargs: Mapping[str, Any] = None, + kwargs: Mapping[str, Any] | None = None, method_non_numeric: str = "nearest", **coords_kwargs: Any, ) -> T_Dataset: @@ -3694,7 +3707,9 @@ def rename_dims( return self._replace(variables, coord_names, dims=sizes, indexes=indexes) def rename_vars( - self: T_Dataset, name_dict: Mapping[Any, Hashable] = None, **names: Hashable + self: T_Dataset, + name_dict: Mapping[Any, Hashable] | None = None, + **names: Hashable, ) -> T_Dataset: """Returns a new object with renamed variables including coordinates @@ -3732,7 +3747,7 @@ def rename_vars( return self._replace(variables, coord_names, dims=dims, indexes=indexes) def swap_dims( - self: T_Dataset, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs + self: T_Dataset, dims_dict: Mapping[Any, Hashable] | None = None, **dims_kwargs ) -> T_Dataset: """Returns a new object with swapped dimensions. @@ -5578,7 +5593,7 @@ def interpolate_na( self: T_Dataset, dim: Hashable | None = None, method: InterpOptions = "linear", - limit: int = None, + limit: int | None = None, use_coordinate: bool | Hashable = True, max_gap: ( int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta @@ -5783,7 +5798,7 @@ def combine_first(self: T_Dataset, other: T_Dataset) -> T_Dataset: def reduce( self: T_Dataset, func: Callable, - dim: Dims | ellipsis = None, + dim: Dims = None, *, keep_attrs: bool | None = None, keepdims: bool = False, @@ -6978,9 +6993,9 @@ def quantile( dim: Dims = None, method: QuantileMethods = "linear", numeric_only: bool = False, - keep_attrs: bool = None, - skipna: bool = None, - interpolation: QuantileMethods = None, + keep_attrs: bool | None = None, + skipna: bool | None = None, + interpolation: QuantileMethods | None = None, ) -> T_Dataset: """Compute the qth quantile of the data along the specified dimension. @@ -7905,7 +7920,7 @@ def polyfit( def pad( self: T_Dataset, - pad_width: Mapping[Any, int | tuple[int, int]] = None, + pad_width: Mapping[Any, int | tuple[int, int]] | None = None, mode: PadModeOptions = "constant", stat_length: int | tuple[int, int] @@ -9114,7 +9129,9 @@ def resample( skipna: bool | None = None, closed: SideOptions | None = None, label: SideOptions | None = None, - base: int = 0, + base: int | None = None, + offset: pd.Timedelta | datetime.timedelta | str | None = None, + origin: str | DatetimeLike = "start_day", keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, @@ -9138,10 +9155,22 @@ def resample( Side of each interval to treat as closed. label : {"left", "right"}, optional Side of each interval to use for labeling. - base : int, default = 0 + base : int, optional For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for "24H" frequency, base could range from 0 through 23. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : pd.Timedelta, datetime.timedelta, or str, default is None + An offset timedelta added to the origin. loffset : timedelta or str, optional Offset used to adjust the resampled time labels. Some pandas date offset strings are supported. @@ -9176,6 +9205,8 @@ def resample( closed=closed, label=label, base=base, + offset=offset, + origin=origin, keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index 84d184dcaca..d40151e48fb 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -105,6 +105,7 @@ def register_dataset_accessor(name): ... def plot(self): ... # plot this array's data on a map, e.g., using Cartopy ... pass + ... Back in an interactive IPython session: diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index b014cf43e83..351a4b27687 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -285,7 +285,11 @@ def inline_variable_array_repr(var, max_width): def summarize_variable( - name: Hashable, var, col_width: int, max_width: int = None, is_index: bool = False + name: Hashable, + var, + col_width: int, + max_width: int | None = None, + is_index: bool = False, ): """Summarize a variable in one line, e.g., for the Dataset.__repr__.""" variable = getattr(var, "variable", var) @@ -419,7 +423,9 @@ def inline_index_repr(index, max_width=None): return repr_ -def summarize_index(name: Hashable, index, col_width: int, max_width: int = None): +def summarize_index( + name: Hashable, index, col_width: int, max_width: int | None = None +): if max_width is None: max_width = OPTIONS["display_width"] diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 8373de6567d..37d2b5e4013 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -551,7 +551,7 @@ def map( def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -699,7 +699,7 @@ def _maybe_unstack(self, obj): def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, keep_attrs: bool | None = None, **kwargs: Any, ): @@ -757,7 +757,7 @@ def _flox_reduce( elif dim is ...: parsed_dim = tuple(self._original_obj.dims) else: - parsed_dim = tuple(dim) # type:ignore[arg-type] + parsed_dim = tuple(dim) # Do this so we raise the same error message whether flox is present or not. # Better to control it here than in flox. @@ -1183,7 +1183,7 @@ def _combine(self, applied, shortcut=False): def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -1336,7 +1336,7 @@ def _combine(self, applied): def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index a18322fe06b..a768155ba7d 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -52,7 +52,7 @@ def concat( cls: type[T_Index], indexes: Sequence[T_Index], dim: Hashable, - positions: Iterable[Iterable[int]] = None, + positions: Iterable[Iterable[int]] | None = None, ) -> T_Index: raise NotImplementedError() @@ -117,10 +117,12 @@ def __copy__(self) -> Index: def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Index: return self._copy(deep=True, memo=memo) - def copy(self, deep: bool = True) -> Index: + def copy(self: T_Index, deep: bool = True) -> T_Index: return self._copy(deep=deep) - def _copy(self, deep: bool = True, memo: dict[int, Any] | None = None) -> Index: + def _copy( + self: T_Index, deep: bool = True, memo: dict[int, Any] | None = None + ) -> T_Index: cls = self.__class__ copied = cls.__new__(cls) if deep: @@ -269,6 +271,9 @@ def get_indexer_nd(index, labels, method=None, tolerance=None): return indexer +T_PandasIndex = TypeVar("T_PandasIndex", bound="PandasIndex") + + class PandasIndex(Index): """Wrap a pandas.Index as an xarray compatible index.""" @@ -368,7 +373,7 @@ def concat( cls, indexes: Sequence[PandasIndex], dim: Hashable, - positions: Iterable[Iterable[int]] = None, + positions: Iterable[Iterable[int]] | None = None, ) -> PandasIndex: new_pd_index = cls._concat_indexes(indexes, dim, positions) @@ -532,8 +537,11 @@ def rename(self, name_dict, dims_dict): new_dim = dims_dict.get(self.dim, self.dim) return self._replace(index, dim=new_dim) - def copy(self, deep=True): + def _copy( + self: T_PandasIndex, deep: bool = True, memo: dict[int, Any] | None = None + ) -> T_PandasIndex: if deep: + # pandas is not using the memo index = self.index.copy(deep=True) else: # index will be copied in constructor @@ -656,7 +664,7 @@ def concat( # type: ignore[override] cls, indexes: Sequence[PandasMultiIndex], dim: Hashable, - positions: Iterable[Iterable[int]] = None, + positions: Iterable[Iterable[int]] | None = None, ) -> PandasMultiIndex: new_pd_index = cls._concat_indexes(indexes, dim, positions) @@ -1265,11 +1273,19 @@ def to_pandas_indexes(self) -> Indexes[pd.Index]: return Indexes(indexes, self._variables) def copy_indexes( - self, deep: bool = True + self, deep: bool = True, memo: dict[int, Any] | None = None ) -> tuple[dict[Hashable, T_PandasOrXarrayIndex], dict[Hashable, Variable]]: """Return a new dictionary with copies of indexes, preserving unique indexes. + Parameters + ---------- + deep : bool, default: True + Whether the indexes are deep or shallow copied onto the new object. + memo : dict if object id to copied objects or None, optional + To prevent infinite recursion deepcopy stores all copied elements + in this dict. + """ new_indexes = {} new_index_vars = {} @@ -1285,7 +1301,7 @@ def copy_indexes( else: convert_new_idx = False - new_idx = idx.copy(deep=deep) + new_idx = idx._copy(deep=deep, memo=memo) idx_vars = idx.create_variables(coords) if convert_new_idx: @@ -1346,7 +1362,7 @@ def indexes_equal( other_index: Index, variable: Variable, other_variable: Variable, - cache: dict[tuple[int, int], bool | None] = None, + cache: dict[tuple[int, int], bool | None] | None = None, ) -> bool: """Check if two indexes are equal, possibly with cached results. diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 6b33741633d..8dd2d1a0ead 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -180,7 +180,7 @@ def map_index_queries( # forward dimension indexers with no index/coordinate results.append(IndexSelResult(labels)) else: - results.append(index.sel(labels, **options)) # type: ignore[call-arg] + results.append(index.sel(labels, **options)) merged = merge_sel_results(results) @@ -1422,7 +1422,7 @@ def __init__(self, array: pd.Index, dtype: DTypeLike = None): if dtype is None: self._dtype = get_valid_numpy_dtype(array) else: - self._dtype = np.dtype(dtype) # type: ignore[assignment] + self._dtype = np.dtype(dtype) @property def dtype(self) -> np.dtype: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index c2efcc791a1..859b3aeff8f 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -40,9 +40,9 @@ ArrayLike = Any VariableLike = Union[ ArrayLike, - tuple[DimsLike, ArrayLike], - tuple[DimsLike, ArrayLike, Mapping], - tuple[DimsLike, ArrayLike, Mapping, Mapping], + Tuple[DimsLike, ArrayLike], + Tuple[DimsLike, ArrayLike, Mapping], + Tuple[DimsLike, ArrayLike, Mapping, Mapping], ] XarrayValue = Union[DataArray, Variable, VariableLike] DatasetLike = Union[Dataset, Mapping[Any, XarrayValue]] @@ -207,7 +207,7 @@ def _assert_prioritized_valid( def merge_collected( grouped: dict[Hashable, list[MergeElement]], - prioritized: Mapping[Any, MergeElement] = None, + prioritized: Mapping[Any, MergeElement] | None = None, compat: CompatOptions = "minimal", combine_attrs: CombineAttrsOptions = "override", equals: dict[Hashable, bool] | None = None, @@ -391,7 +391,7 @@ def collect_from_coordinates( def merge_coordinates_without_align( objects: list[Coordinates], - prioritized: Mapping[Any, MergeElement] = None, + prioritized: Mapping[Any, MergeElement] | None = None, exclude_dims: AbstractSet = frozenset(), combine_attrs: CombineAttrsOptions = "override", ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]: diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 7390c8971fc..93423a4beff 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -308,12 +308,12 @@ def get_clean_interp_index( def interp_na( self, - dim: Hashable = None, + dim: Hashable | None = None, use_coordinate: bool | str = True, method: InterpOptions = "linear", - limit: int = None, + limit: int | None = None, max_gap: int | float | str | pd.Timedelta | np.timedelta64 | dt.timedelta = None, - keep_attrs: bool = None, + keep_attrs: bool | None = None, **kwargs, ): """Interpolate values according to different methods.""" diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 50f3c474f38..0ef428e3d96 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -157,7 +157,7 @@ def map_blocks( func: Callable[..., T_Xarray], obj: DataArray | Dataset, args: Sequence[Any] = (), - kwargs: Mapping[str, Any] = None, + kwargs: Mapping[str, Any] | None = None, template: DataArray | Dataset | None = None, ) -> T_Xarray: """Apply a function to each block of a DataArray or Dataset. diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 0a6537fe909..474b694dcf0 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -1,7 +1,8 @@ from __future__ import annotations from importlib import import_module -from typing import Any, Literal +from types import ModuleType +from typing import TYPE_CHECKING, Any, Literal, Tuple, Type import numpy as np from packaging.version import Version @@ -10,7 +11,9 @@ integer_types = (int, np.integer) -ModType = Literal["dask", "pint", "cupy", "sparse"] +if TYPE_CHECKING: + ModType = Literal["dask", "pint", "cupy", "sparse"] + DuckArrayTypes = Tuple[Type[Any], ...] # TODO: improve this? maybe Generic class DuckArrayModule: @@ -21,12 +24,15 @@ class DuckArrayModule: https://github.com/pydata/xarray/pull/5561#discussion_r664815718 """ - module: ModType | None + module: ModuleType | None version: Version - type: tuple[type[Any]] # TODO: improve this? maybe Generic + type: DuckArrayTypes available: bool def __init__(self, mod: ModType) -> None: + duck_array_module: ModuleType | None = None + duck_array_version: Version + duck_array_type: DuckArrayTypes try: duck_array_module = import_module(mod) duck_array_version = Version(duck_array_module.__version__) @@ -53,7 +59,7 @@ def __init__(self, mod: ModType) -> None: self.available = duck_array_module is not None -def array_type(mod: ModType) -> tuple[type[Any]]: +def array_type(mod: ModType) -> DuckArrayTypes: """Quick wrapper to get the array class of the module.""" return DuckArrayModule(mod).type diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 29f84231e13..b9800a99d4a 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -47,7 +47,7 @@ def __init__( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, keep_attrs: bool | None = None, **kwargs, ) -> T_Xarray: @@ -346,7 +346,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 0c50ce5587b..9aa2d792031 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -38,21 +38,27 @@ from __future__ import annotations import datetime +import typing import numpy as np import pandas as pd from ..coding.cftime_offsets import ( - CFTIME_TICKS, + BaseCFTimeOffset, Day, MonthEnd, QuarterEnd, + Tick, YearEnd, cftime_range, normalize_date, to_offset, ) from ..coding.cftimeindex import CFTimeIndex +from .types import SideOptions + +if typing.TYPE_CHECKING: + from .types import CFTimeDatetime class CFTimeGrouper: @@ -60,25 +66,77 @@ class CFTimeGrouper: single method, the only one required for resampling in xarray. It cannot be used in a call to groupby like a pandas.Grouper object can.""" - def __init__(self, freq, closed=None, label=None, base=0, loffset=None): + def __init__( + self, + freq: str | BaseCFTimeOffset, + closed: SideOptions | None = None, + label: SideOptions | None = None, + base: int | None = None, + loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None, + origin: str | CFTimeDatetime = "start_day", + offset: str | datetime.timedelta | None = None, + ): + self.offset: datetime.timedelta | None + self.closed: SideOptions + self.label: SideOptions + + if base is not None and offset is not None: + raise ValueError("base and offset cannot be provided at the same time") + self.freq = to_offset(freq) - self.closed = closed - self.label = label - self.base = base self.loffset = loffset + self.origin = origin if isinstance(self.freq, (MonthEnd, QuarterEnd, YearEnd)): - if self.closed is None: + if closed is None: self.closed = "right" - if self.label is None: + else: + self.closed = closed + if label is None: self.label = "right" + else: + self.label = label + else: + # The backward resample sets ``closed`` to ``'right'`` by default + # since the last value should be considered as the edge point for + # the last bin. When origin in "end" or "end_day", the value for a + # specific ``cftime.datetime`` index stands for the resample result + # from the current ``cftime.datetime`` minus ``freq`` to the current + # ``cftime.datetime`` with a right close. + if self.origin in ["end", "end_day"]: + if closed is None: + self.closed = "right" + else: + self.closed = closed + if label is None: + self.label = "right" + else: + self.label = label + else: + if closed is None: + self.closed = "left" + else: + self.closed = closed + if label is None: + self.label = "left" + else: + self.label = label + + if base is not None and isinstance(self.freq, Tick): + offset = type(self.freq)(n=base % self.freq.n).as_timedelta() + + if offset is not None: + try: + self.offset = _convert_offset_to_timedelta(offset) + except (ValueError, AttributeError) as error: + raise ValueError( + f"offset must be a datetime.timedelta object or an offset string " + f"that can be converted to a timedelta. Got {offset} instead." + ) from error else: - if self.closed is None: - self.closed = "left" - if self.label is None: - self.label = "left" + self.offset = None - def first_items(self, index): + def first_items(self, index: CFTimeIndex): """Meant to reproduce the results of the following grouper = pandas.Grouper(...) @@ -89,7 +147,7 @@ def first_items(self, index): """ datetime_bins, labels = _get_time_bins( - index, self.freq, self.closed, self.label, self.base + index, self.freq, self.closed, self.label, self.origin, self.offset ) if self.loffset is not None: labels = labels + pd.to_timedelta(self.loffset) @@ -110,7 +168,14 @@ def first_items(self, index): return first_items.where(non_duplicate), codes -def _get_time_bins(index, freq, closed, label, base): +def _get_time_bins( + index: CFTimeIndex, + freq: BaseCFTimeOffset, + closed: SideOptions, + label: SideOptions, + origin: str | CFTimeDatetime, + offset: datetime.timedelta | None, +): """Obtain the bins and their respective labels for resampling operations. Parameters @@ -121,18 +186,26 @@ def _get_time_bins(index, freq, closed, label, base): The offset object representing target conversion a.k.a. resampling frequency (e.g., 'MS', '2D', 'H', or '3T' with coding.cftime_offsets.to_offset() applied to it). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. The default is 'left' for all frequency offsets except for 'M' and 'A', which have a default of 'right'. - label : 'left' or 'right', optional + label : 'left' or 'right' Which bin edge label to label bucket with. The default is 'left' for all frequency offsets except for 'M' and 'A', which have a default of 'right'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -153,7 +226,7 @@ def _get_time_bins(index, freq, closed, label, base): return datetime_bins, labels first, last = _get_range_edges( - index.min(), index.max(), freq, closed=closed, base=base + index.min(), index.max(), freq, closed=closed, origin=origin, offset=offset ) datetime_bins = labels = cftime_range( freq=freq, start=first, end=last, name=index.name @@ -171,7 +244,13 @@ def _get_time_bins(index, freq, closed, label, base): return datetime_bins, labels -def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): +def _adjust_bin_edges( + datetime_bins: np.ndarray, + freq: BaseCFTimeOffset, + closed: SideOptions, + index: CFTimeIndex, + labels: np.ndarray, +): """This is required for determining the bin edges resampling with daily frequencies greater than one day, month end, and year end frequencies. @@ -206,8 +285,8 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): This is also required for daily frequencies longer than one day and year-end frequencies. """ - is_super_daily = isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)) or ( - isinstance(offset, Day) and offset.n > 1 + is_super_daily = isinstance(freq, (MonthEnd, QuarterEnd, YearEnd)) or ( + isinstance(freq, Day) and freq.n > 1 ) if is_super_daily: if closed == "right": @@ -219,7 +298,14 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): return datetime_bins, labels -def _get_range_edges(first, last, offset, closed="left", base=0): +def _get_range_edges( + first: CFTimeDatetime, + last: CFTimeDatetime, + freq: BaseCFTimeOffset, + closed: SideOptions = "left", + origin: str | CFTimeDatetime = "start_day", + offset: datetime.timedelta | None = None, +): """Get the correct starting and ending datetimes for the resampled CFTimeIndex range. @@ -231,16 +317,24 @@ def _get_range_edges(first, last, offset, closed="left", base=0): last : cftime.datetime Uncorrected ending datetime object for resampled CFTimeIndex range. Usually the max of the original CFTimeIndex. - offset : xarray.coding.cftime_offsets.BaseCFTimeOffset + freq : xarray.coding.cftime_offsets.BaseCFTimeOffset The offset object representing target conversion a.k.a. resampling frequency. Contains information on offset type (e.g. Day or 'D') and offset magnitude (e.g., n = 3). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. Defaults to 'left'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -249,21 +343,28 @@ def _get_range_edges(first, last, offset, closed="left", base=0): last : cftime.datetime Corrected ending datetime object for resampled CFTimeIndex range. """ - if isinstance(offset, CFTIME_TICKS): + if isinstance(freq, Tick): first, last = _adjust_dates_anchored( - first, last, offset, closed=closed, base=base + first, last, freq, closed=closed, origin=origin, offset=offset ) return first, last else: first = normalize_date(first) last = normalize_date(last) - first = offset.rollback(first) if closed == "left" else first - offset - last = last + offset + first = freq.rollback(first) if closed == "left" else first - freq + last = last + freq return first, last -def _adjust_dates_anchored(first, last, offset, closed="right", base=0): +def _adjust_dates_anchored( + first: CFTimeDatetime, + last: CFTimeDatetime, + freq: Tick, + closed: SideOptions = "right", + origin: str | CFTimeDatetime = "start_day", + offset: datetime.timedelta | None = None, +): """First and last offsets should be calculated from the start day to fix an error cause by resampling across multiple days when a one day period is not a multiple of the frequency. @@ -275,16 +376,24 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): A datetime object representing the start of a CFTimeIndex range. last : cftime.datetime A datetime object representing the end of a CFTimeIndex range. - offset : xarray.coding.cftime_offsets.BaseCFTimeOffset + freq : xarray.coding.cftime_offsets.BaseCFTimeOffset The offset object representing target conversion a.k.a. resampling frequency. Contains information on offset type (e.g. Day or 'D') and offset magnitude (e.g., n = 3). - closed : 'left' or 'right', optional + closed : 'left' or 'right' Which side of bin interval is closed. Defaults to 'right'. - base : int, optional - For frequencies that evenly subdivide 1 day, the "origin" of the - aggregated intervals. For example, for '5min' frequency, base could - range from 0 through 4. Defaults to 0. + origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day' + The datetime on which to adjust the grouping. The timezone of origin + must match the timezone of the index. + + If a datetime is not used, these values are also supported: + - 'epoch': `origin` is 1970-01-01 + - 'start': `origin` is the first value of the timeseries + - 'start_day': `origin` is the first day at midnight of the timeseries + - 'end': `origin` is the last value of the timeseries + - 'end_day': `origin` is the ceiling midnight of the last day + offset : datetime.timedelta, default is None + An offset timedelta added to the origin. Returns ------- @@ -295,33 +404,59 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0): A datetime object representing the end of a date range that has been adjusted to fix resampling errors. """ + import cftime + + if origin == "start_day": + origin_date = normalize_date(first) + elif origin == "start": + origin_date = first + elif origin == "epoch": + origin_date = type(first)(1970, 1, 1) + elif origin in ["end", "end_day"]: + origin_last = last if origin == "end" else _ceil_via_cftimeindex(last, "D") + sub_freq_times = (origin_last - first) // freq.as_timedelta() + if closed == "left": + sub_freq_times += 1 + first = origin_last - sub_freq_times * freq + origin_date = first + elif isinstance(origin, cftime.datetime): + origin_date = origin + else: + raise ValueError( + f"origin must be one of {{'epoch', 'start_day', 'start', 'end', 'end_day'}} " + f"or a cftime.datetime object. Got {origin}." + ) + + if offset is not None: + origin_date = origin_date + offset + + foffset = (first - origin_date) % freq.as_timedelta() + loffset = (last - origin_date) % freq.as_timedelta() - base = base % offset.n - start_day = normalize_date(first) - base_td = type(offset)(n=base).as_timedelta() - start_day += base_td - foffset = exact_cftime_datetime_difference(start_day, first) % offset.as_timedelta() - loffset = exact_cftime_datetime_difference(start_day, last) % offset.as_timedelta() if closed == "right": if foffset.total_seconds() > 0: fresult = first - foffset else: - fresult = first - offset.as_timedelta() + fresult = first - freq.as_timedelta() if loffset.total_seconds() > 0: - lresult = last + (offset.as_timedelta() - loffset) + lresult = last + (freq.as_timedelta() - loffset) else: lresult = last else: - fresult = first - foffset if foffset.total_seconds() > 0 else first + if foffset.total_seconds() > 0: + fresult = first - foffset + else: + fresult = first + if loffset.total_seconds() > 0: - lresult = last + (offset.as_timedelta() - loffset) + lresult = last + (freq.as_timedelta() - loffset) else: - lresult = last + offset.as_timedelta() + lresult = last + freq return fresult, lresult -def exact_cftime_datetime_difference(a, b): +def exact_cftime_datetime_difference(a: CFTimeDatetime, b: CFTimeDatetime): """Exact computation of b - a Assumes: @@ -359,3 +494,19 @@ def exact_cftime_datetime_difference(a, b): seconds = int(round(seconds.total_seconds())) microseconds = b.microsecond - a.microsecond return datetime.timedelta(seconds=seconds, microseconds=microseconds) + + +def _convert_offset_to_timedelta( + offset: datetime.timedelta | str | BaseCFTimeOffset, +) -> datetime.timedelta: + if isinstance(offset, datetime.timedelta): + return offset + elif isinstance(offset, (str, Tick)): + return to_offset(offset).as_timedelta() + else: + raise ValueError + + +def _ceil_via_cftimeindex(date: CFTimeDatetime, freq: str | BaseCFTimeOffset): + index = CFTimeIndex([date]) + return index.ceil(freq).item() diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 57a5456b70a..f7a573019ae 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1002,7 +1002,7 @@ def _reduce_method( kwargs["skipna"] = None def wrapped_func( - self: DataArrayCoarsen, keep_attrs: bool = None, **kwargs + self: DataArrayCoarsen, keep_attrs: bool | None = None, **kwargs ) -> DataArray: from .dataarray import DataArray @@ -1033,7 +1033,9 @@ def wrapped_func( return wrapped_func - def reduce(self, func: Callable, keep_attrs: bool = None, **kwargs) -> DataArray: + def reduce( + self, func: Callable, keep_attrs: bool | None = None, **kwargs + ) -> DataArray: """Reduce the items in this group by applying `func` along some dimension(s). @@ -1088,7 +1090,7 @@ def _reduce_method( kwargs["skipna"] = None def wrapped_func( - self: DatasetCoarsen, keep_attrs: bool = None, **kwargs + self: DatasetCoarsen, keep_attrs: bool | None = None, **kwargs ) -> Dataset: from .dataset import Dataset diff --git a/xarray/core/types.py b/xarray/core/types.py index 2b65f4d23e6..adf046dabb2 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -1,20 +1,24 @@ from __future__ import annotations +import datetime from typing import ( TYPE_CHECKING, Any, Callable, Hashable, Iterable, + List, Literal, Protocol, Sequence, SupportsIndex, + Tuple, TypeVar, Union, ) import numpy as np +import pandas as pd from packaging.version import Version if TYPE_CHECKING: @@ -70,17 +74,21 @@ def dtype(self) -> np.dtype: # character codes, type strings or comma-separated fields, e.g., 'float64' str, # (flexible_dtype, itemsize) - tuple[_DTypeLikeNested, int], + Tuple[_DTypeLikeNested, int], # (fixed_dtype, shape) - tuple[_DTypeLikeNested, _ShapeLike], + Tuple[_DTypeLikeNested, _ShapeLike], # (base_dtype, new_dtype) - tuple[_DTypeLikeNested, _DTypeLikeNested], + Tuple[_DTypeLikeNested, _DTypeLikeNested], # because numpy does the same? - list[Any], + List[Any], # anything with a dtype attribute _SupportsDType, ] - + try: + from cftime import datetime as CFTimeDatetime + except ImportError: + CFTimeDatetime = Any + DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64, CFTimeDatetime] else: Self: Any = None DTypeLikeSave: Any = None @@ -105,7 +113,8 @@ def dtype(self) -> np.dtype: VarCompatible = Union["Variable", "ScalarOrArray"] GroupByIncompatible = Union["Variable", "GroupBy"] -Dims = Union[str, Iterable[Hashable], None] +Dims = Union[str, Iterable[Hashable], "ellipsis", None] +OrderedDims = Union[str, Sequence[Union[Hashable, "ellipsis"]], "ellipsis", None] ErrorOptions = Literal["raise", "ignore"] ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"] diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 60d2d65f068..7ecb73049d1 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -1,9 +1,44 @@ """Internal utilities; not for external use""" +# Some functions in this module are derived from functions in pandas. For +# reference, here is a copy of the pandas copyright notice: + +# BSD 3-Clause License + +# Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team +# All rights reserved. + +# Copyright (c) 2011-2022, Open source contributors. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: + +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. + +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. + +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from __future__ import annotations import contextlib import functools import importlib +import inspect import io import itertools import math @@ -22,9 +57,11 @@ Hashable, Iterable, Iterator, + Literal, Mapping, MutableMapping, MutableSet, + Sequence, TypeVar, cast, overload, @@ -34,7 +71,7 @@ import pandas as pd if TYPE_CHECKING: - from .types import ErrorOptionsWithWarn + from .types import Dims, ErrorOptionsWithWarn, OrderedDims K = TypeVar("K") V = TypeVar("V") @@ -472,7 +509,7 @@ class OrderedSet(MutableSet[T]): __slots__ = ("_d",) - def __init__(self, values: Iterable[T] = None): + def __init__(self, values: Iterable[T] | None = None): self._d = {} if values is not None: self.update(values) @@ -619,15 +656,11 @@ def read_magic_number_from_file(filename_or_obj, count=8) -> bytes: magic_number = filename_or_obj[:count] elif isinstance(filename_or_obj, io.IOBase): if filename_or_obj.tell() != 0: - raise ValueError( - "cannot guess the engine, " - "file-like object read/write pointer not at the start of the file, " - "please close and reopen, or use a context manager" - ) + filename_or_obj.seek(0) magic_number = filename_or_obj.read(count) filename_or_obj.seek(0) else: - raise TypeError(f"cannot read the magic number form {type(filename_or_obj)}") + raise TypeError(f"cannot read the magic number from {type(filename_or_obj)}") return magic_number @@ -852,15 +885,17 @@ def drop_dims_from_indexers( def drop_missing_dims( - supplied_dims: Collection, dims: Collection, missing_dims: ErrorOptionsWithWarn -) -> Collection: + supplied_dims: Iterable[Hashable], + dims: Iterable[Hashable], + missing_dims: ErrorOptionsWithWarn, +) -> Iterable[Hashable]: """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that are not present in dims. Parameters ---------- - supplied_dims : dict - dims : sequence + supplied_dims : Iterable of Hashable + dims : Iterable of Hashable missing_dims : {"raise", "warn", "ignore"} """ @@ -893,6 +928,158 @@ def drop_missing_dims( ) +T_None = TypeVar("T_None", None, "ellipsis") + + +@overload +def parse_dims( + dim: str | Iterable[Hashable] | T_None, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: Literal[True] = True, +) -> tuple[Hashable, ...]: + ... + + +@overload +def parse_dims( + dim: str | Iterable[Hashable] | T_None, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: Literal[False], +) -> tuple[Hashable, ...] | T_None: + ... + + +def parse_dims( + dim: Dims, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: bool = True, +) -> tuple[Hashable, ...] | None | ellipsis: + """Parse one or more dimensions. + + A single dimension must be always a str, multiple dimensions + can be Hashables. This supports e.g. using a tuple as a dimension. + If you supply e.g. a set of dimensions the order cannot be + conserved, but for sequences it will be. + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None + Dimension(s) to parse. + all_dims : tuple of Hashable + All possible dimensions. + check_exists: bool, default: True + if True, check if dim is a subset of all_dims. + replace_none : bool, default: True + If True, return all_dims if dim is None or "...". + + Returns + ------- + parsed_dims : tuple of Hashable + Input dimensions as a tuple. + """ + if dim is None or dim is ...: + if replace_none: + return all_dims + return dim + if isinstance(dim, str): + dim = (dim,) + if check_exists: + _check_dims(set(dim), set(all_dims)) + return tuple(dim) + + +@overload +def parse_ordered_dims( + dim: str | Sequence[Hashable | ellipsis] | T_None, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: Literal[True] = True, +) -> tuple[Hashable, ...]: + ... + + +@overload +def parse_ordered_dims( + dim: str | Sequence[Hashable | ellipsis] | T_None, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: Literal[False], +) -> tuple[Hashable, ...] | T_None: + ... + + +def parse_ordered_dims( + dim: OrderedDims, + all_dims: tuple[Hashable, ...], + *, + check_exists: bool = True, + replace_none: bool = True, +) -> tuple[Hashable, ...] | None | ellipsis: + """Parse one or more dimensions. + + A single dimension must be always a str, multiple dimensions + can be Hashables. This supports e.g. using a tuple as a dimension. + An ellipsis ("...") in a sequence of dimensions will be + replaced with all remaining dimensions. This only makes sense when + the input is a sequence and not e.g. a set. + + Parameters + ---------- + dim : str, Sequence of Hashable or "...", "..." or None + Dimension(s) to parse. If "..." appears in a Sequence + it always gets replaced with all remaining dims + all_dims : tuple of Hashable + All possible dimensions. + check_exists: bool, default: True + if True, check if dim is a subset of all_dims. + replace_none : bool, default: True + If True, return all_dims if dim is None. + + Returns + ------- + parsed_dims : tuple of Hashable + Input dimensions as a tuple. + """ + if dim is not None and dim is not ... and not isinstance(dim, str) and ... in dim: + dims_set: set[Hashable | ellipsis] = set(dim) + all_dims_set = set(all_dims) + if check_exists: + _check_dims(dims_set, all_dims_set) + if len(all_dims_set) != len(all_dims): + raise ValueError("Cannot use ellipsis with repeated dims") + dims = tuple(dim) + if dims.count(...) > 1: + raise ValueError("More than one ellipsis supplied") + other_dims = tuple(d for d in all_dims if d not in dims_set) + idx = dims.index(...) + return dims[:idx] + other_dims + dims[idx + 1 :] + else: + # mypy cannot resolve that the sequence cannot contain "..." + return parse_dims( # type: ignore[call-overload] + dim=dim, + all_dims=all_dims, + check_exists=check_exists, + replace_none=replace_none, + ) + + +def _check_dims(dim: set[Hashable | ellipsis], all_dims: set[Hashable]) -> None: + wrong_dims = dim - all_dims + if wrong_dims and wrong_dims != {...}: + wrong_dims_str = ", ".join(f"'{d!s}'" for d in wrong_dims) + raise ValueError( + f"Dimension(s) {wrong_dims_str} do not exist. Expected one or more of {all_dims}" + ) + + _Accessor = TypeVar("_Accessor") @@ -972,3 +1159,46 @@ def module_available(module: str) -> bool: Whether the module is installed. """ return importlib.util.find_spec(module) is not None + + +def find_stack_level(test_mode=False) -> int: + """Find the first place in the stack that is not inside xarray. + + This is unless the code emanates from a test, in which case we would prefer + to see the xarray source. + + This function is taken from pandas. + + Parameters + ---------- + test_mode : bool + Flag used for testing purposes to switch off the detection of test + directories in the stack trace. + + Returns + ------- + stacklevel : int + First level in the stack that is not part of xarray. + """ + import xarray as xr + + pkg_dir = os.path.dirname(xr.__file__) + test_dir = os.path.join(pkg_dir, "tests") + + # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow + frame = inspect.currentframe() + n = 0 + while frame: + fname = inspect.getfile(frame) + if fname.startswith(pkg_dir) and (not fname.startswith(test_dir) or test_mode): + frame = frame.f_back + n += 1 + else: + break + return n + + +def emit_user_level_warning(message, category=None): + """Emit a warning at the user level by inspecting the stack trace.""" + stacklevel = find_stack_level() + warnings.warn(message, category=category, stacklevel=stacklevel) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1e14e8dc38e..bb988392f50 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -71,7 +71,10 @@ NON_NANOSECOND_WARNING = ( "Converting non-nanosecond precision {case} values to nanosecond precision. " "This behavior can eventually be relaxed in xarray, as it is an artifact from " - "pandas which is now beginning to support non-nanosecond precision values." + "pandas which is now beginning to support non-nanosecond precision values. " + "This warning is caused by passing non-nanosecond np.datetime64 or " + "np.timedelta64 values to the DataArray or Variable constructor; it can be " + "silenced by converting the values to nanosecond precision ahead of time." ) @@ -191,14 +194,14 @@ def _as_nanosecond_precision(data): isinstance(dtype, pd.DatetimeTZDtype) and dtype.unit != "ns" ) if non_ns_datetime64 or non_ns_datetime_tz_dtype: - warnings.warn(NON_NANOSECOND_WARNING.format(case="datetime")) + utils.emit_user_level_warning(NON_NANOSECOND_WARNING.format(case="datetime")) if isinstance(dtype, pd.DatetimeTZDtype): nanosecond_precision_dtype = pd.DatetimeTZDtype("ns", dtype.tz) else: nanosecond_precision_dtype = "datetime64[ns]" return data.astype(nanosecond_precision_dtype) elif dtype.kind == "m" and dtype != np.dtype("timedelta64[ns]"): - warnings.warn(NON_NANOSECOND_WARNING.format(case="timedelta")) + utils.emit_user_level_warning(NON_NANOSECOND_WARNING.format(case="timedelta")) return data.astype("timedelta64[ns]") else: return data @@ -1136,7 +1139,7 @@ def chunk( | tuple[tuple[int, ...], ...] | Mapping[Any, None | int | tuple[int, ...]] ) = {}, - name: str = None, + name: str | None = None, lock: bool = False, inline_array: bool = False, **chunks_kwargs: Any, @@ -1285,7 +1288,7 @@ def _to_dense(self): def isel( self: T_Variable, - indexers: Mapping[Any, Any] = None, + indexers: Mapping[Any, Any] | None = None, missing_dims: ErrorOptionsWithWarn = "raise", **indexers_kwargs: Any, ) -> T_Variable: @@ -1506,7 +1509,7 @@ def pad( if reflect_type is not None: pad_option_kwargs["reflect_type"] = reflect_type - array = np.pad( # type: ignore[call-overload] + array = np.pad( self.data.astype(dtype, copy=False), pad_width_by_index, mode=mode, @@ -1886,7 +1889,7 @@ def clip(self, min=None, max=None): def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, keepdims: bool = False, @@ -2119,9 +2122,9 @@ def quantile( q: ArrayLike, dim: str | Sequence[Hashable] | None = None, method: QuantileMethods = "linear", - keep_attrs: bool = None, - skipna: bool = None, - interpolation: QuantileMethods = None, + keep_attrs: bool | None = None, + skipna: bool | None = None, + interpolation: QuantileMethods | None = None, ) -> Variable: """Compute the qth quantile of the data along the specified dimension. @@ -2519,7 +2522,7 @@ def coarsen_reshape(self, windows, boundary, side): return variable.data.reshape(shape), tuple(axes) - def isnull(self, keep_attrs: bool = None): + def isnull(self, keep_attrs: bool | None = None): """Test each value in the array for whether it is a missing value. Returns @@ -2553,7 +2556,7 @@ def isnull(self, keep_attrs: bool = None): keep_attrs=keep_attrs, ) - def notnull(self, keep_attrs: bool = None): + def notnull(self, keep_attrs: bool | None = None): """Test each value in the array for whether it is not a missing value. Returns @@ -2660,7 +2663,7 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float): def _unravel_argminmax( self, argminmax: str, - dim: Dims | ellipsis, + dim: Dims, axis: int | None, keep_attrs: bool | None, skipna: bool | None, @@ -2729,10 +2732,10 @@ def _unravel_argminmax( def argmin( self, - dim: Dims | ellipsis = None, - axis: int = None, - keep_attrs: bool = None, - skipna: bool = None, + dim: Dims = None, + axis: int | None = None, + keep_attrs: bool | None = None, + skipna: bool | None = None, ) -> Variable | dict[Hashable, Variable]: """Index or indices of the minimum of the Variable over one or more dimensions. If a sequence is passed to 'dim', then result returned as dict of Variables, @@ -2774,10 +2777,10 @@ def argmin( def argmax( self, - dim: Dims | ellipsis = None, - axis: int = None, - keep_attrs: bool = None, - skipna: bool = None, + dim: Dims = None, + axis: int | None = None, + keep_attrs: bool | None = None, + skipna: bool | None = None, ) -> Variable | dict[Hashable, Variable]: """Index or indices of the maximum of the Variable over one or more dimensions. If a sequence is passed to 'dim', then result returned as dict of Variables, @@ -2996,7 +2999,7 @@ def _data_equals(self, other): def to_index_variable(self) -> IndexVariable: """Return this variable as an xarray.IndexVariable""" - return self.copy() + return self.copy(deep=False) to_coord = utils.alias(to_index_variable, "to_coord") diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index fafe8188792..0f3a9aa3432 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -207,7 +207,7 @@ def _check_dim(self, dim: Dims): def _reduce( da: DataArray, weights: DataArray, - dim: Dims | ellipsis = None, + dim: Dims = None, skipna: bool | None = None, ) -> DataArray: """reduce using dot; equivalent to (da * weights).sum(dim, skipna) @@ -312,7 +312,7 @@ def _weighted_quantile( da: DataArray, q: ArrayLike, dim: Dims = None, - skipna: bool = None, + skipna: bool | None = None, ) -> DataArray: """Apply a weighted ``quantile`` to a DataArray along some dimension(s).""" @@ -516,7 +516,7 @@ def quantile( q: ArrayLike, *, dim: Dims = None, - keep_attrs: bool = None, + keep_attrs: bool | None = None, skipna: bool = True, ) -> T_Xarray: diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py index ac23f7dc96d..46aaf33f683 100644 --- a/xarray/plot/dataarray_plot.py +++ b/xarray/plot/dataarray_plot.py @@ -1488,7 +1488,7 @@ def newplotfunc( if ax is None: # TODO: Importing Axes3D is no longer necessary in matplotlib >= 3.2. # Remove when minimum requirement of matplotlib is 3.2: - from mpl_toolkits.mplot3d import Axes3D # type: ignore # noqa: F401 + from mpl_toolkits.mplot3d import Axes3D # noqa: F401 # delete so it does not end up in locals() del Axes3D @@ -1521,7 +1521,7 @@ def newplotfunc( and not kwargs.get("_is_facetgrid", False) and ax is not None ): - import mpl_toolkits # type: ignore + import mpl_toolkits if not isinstance(ax, mpl_toolkits.mplot3d.Axes3D): raise ValueError( diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 31daff58b55..ba5ea736bbd 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -51,8 +51,8 @@ ROBUST_PERCENTILE = 2.0 # copied from seaborn -_MARKERSIZE_RANGE = (18.0, 72.0) -_LINEWIDTH_RANGE = (1.5, 6.0) +_MARKERSIZE_RANGE = (18.0, 36.0, 72.0) +_LINEWIDTH_RANGE = (1.5, 1.5, 6.0) def import_matplotlib_pyplot(): @@ -609,8 +609,8 @@ def _resolve_intervals_1dplot( remove_drawstyle = False # Convert intervals to double points - x_is_interval = _valid_other_type(xval, [pd.Interval]) - y_is_interval = _valid_other_type(yval, [pd.Interval]) + x_is_interval = _valid_other_type(xval, pd.Interval) + y_is_interval = _valid_other_type(yval, pd.Interval) if x_is_interval and y_is_interval: raise TypeError("Can't step plot intervals against intervals.") elif x_is_interval: @@ -628,10 +628,10 @@ def _resolve_intervals_1dplot( else: # Convert intervals to mid points and adjust labels - if _valid_other_type(xval, [pd.Interval]): + if _valid_other_type(xval, pd.Interval): xval = _interval_to_mid_points(xval) x_suffix = "_center" - if _valid_other_type(yval, [pd.Interval]): + if _valid_other_type(yval, pd.Interval): yval = _interval_to_mid_points(yval) y_suffix = "_center" @@ -646,7 +646,7 @@ def _resolve_intervals_2dplot(val, func_name): increases length by 1. """ label_extra = "" - if _valid_other_type(val, [pd.Interval]): + if _valid_other_type(val, pd.Interval): if func_name == "pcolormesh": val = _interval_to_bound_points(val) else: @@ -656,11 +656,13 @@ def _resolve_intervals_2dplot(val, func_name): return val, label_extra -def _valid_other_type(x, types): +def _valid_other_type( + x: ArrayLike, types: type[object] | tuple[type[object], ...] +) -> bool: """ Do all elements of x have a type from types? """ - return all(any(isinstance(el, t) for t in types) for el in np.ravel(x)) + return all(isinstance(el, types) for el in np.ravel(x)) def _valid_numpy_subdtype(x, numpy_types): @@ -675,47 +677,49 @@ def _valid_numpy_subdtype(x, numpy_types): return any(np.issubdtype(x.dtype, t) for t in numpy_types) -def _ensure_plottable(*args): +def _ensure_plottable(*args) -> None: """ Raise exception if there is anything in args that can't be plotted on an axis by matplotlib. """ - numpy_types = [ + numpy_types: tuple[type[object], ...] = ( np.floating, np.integer, np.timedelta64, np.datetime64, np.bool_, np.str_, - ] - other_types = [datetime] - if cftime is not None: - cftime_datetime_types = [cftime.datetime] - other_types = other_types + cftime_datetime_types - else: - cftime_datetime_types = [] + ) + other_types: tuple[type[object], ...] = (datetime,) + cftime_datetime_types: tuple[type[object], ...] = ( + () if cftime is None else (cftime.datetime,) + ) + other_types += cftime_datetime_types + for x in args: if not ( - _valid_numpy_subdtype(np.array(x), numpy_types) - or _valid_other_type(np.array(x), other_types) + _valid_numpy_subdtype(np.asarray(x), numpy_types) + or _valid_other_type(np.asarray(x), other_types) ): raise TypeError( "Plotting requires coordinates to be numeric, boolean, " "or dates of type numpy.datetime64, " "datetime.datetime, cftime.datetime or " - f"pandas.Interval. Received data of type {np.array(x).dtype} instead." - ) - if ( - _valid_other_type(np.array(x), cftime_datetime_types) - and not nc_time_axis_available - ): - raise ImportError( - "Plotting of arrays of cftime.datetime " - "objects or arrays indexed by " - "cftime.datetime objects requires the " - "optional `nc-time-axis` (v1.2.0 or later) " - "package." + f"pandas.Interval. Received data of type {np.asarray(x).dtype} instead." ) + if _valid_other_type(np.asarray(x), cftime_datetime_types): + if nc_time_axis_available: + # Register cftime datetypes to matplotlib.units.registry, + # otherwise matplotlib will raise an error: + import nc_time_axis # noqa: F401 + else: + raise ImportError( + "Plotting of arrays of cftime.datetime " + "objects or arrays indexed by " + "cftime.datetime objects requires the " + "optional `nc-time-axis` (v1.2.0 or later) " + "package." + ) def _is_numeric(arr): @@ -1337,7 +1341,7 @@ def _parse_size( else: levels = numbers = np.sort(np.unique(flatdata)) - min_width, max_width = _MARKERSIZE_RANGE + min_width, default_width, max_width = _MARKERSIZE_RANGE # width_range = min_width, max_width if norm is None: @@ -1374,8 +1378,8 @@ class _Normalize(Sequence): ---------- data : DataArray DataArray to normalize. - width : Sequence of two numbers, optional - Normalize the data to theses min and max values. + width : Sequence of three numbers, optional + Normalize the data to these (min, default, max) values. The default is None. """ @@ -1384,7 +1388,7 @@ class _Normalize(Sequence): _data_unique_index: np.ndarray _data_unique_inverse: np.ndarray _data_is_numeric: bool - _width: tuple[float, float] | None + _width: tuple[float, float, float] | None __slots__ = ( "_data", @@ -1398,7 +1402,7 @@ class _Normalize(Sequence): def __init__( self, data: DataArray | None, - width: tuple[float, float] | None = None, + width: tuple[float, float, float] | None = None, _is_facetgrid: bool = False, ) -> None: self._data = data @@ -1459,14 +1463,22 @@ def _calc_widths(self, y: DataArray) -> DataArray: ... def _calc_widths(self, y: np.ndarray | DataArray) -> np.ndarray | DataArray: + """ + Normalize the values so they're inbetween self._width. + """ if self._width is None: return y - x0, x1 = self._width - - k = (y - np.min(y)) / (np.max(y) - np.min(y)) - widths = x0 + k * (x1 - x0) + xmin, xdefault, xmax = self._width + diff_maxy_miny = np.max(y) - np.min(y) + if diff_maxy_miny == 0: + # Use default with if y is constant: + widths = xdefault + 0 * y + else: + # Normalize inbetween xmin and xmax: + k = (y - np.min(y)) / diff_maxy_miny + widths = xmin + k * (xmax - xmin) return widths @overload @@ -1497,7 +1509,7 @@ def values(self) -> DataArray | None: array([3, 1, 1, 3, 5]) Dimensions without coordinates: dim_0 - >>> _Normalize(a, width=[18, 72]).values + >>> _Normalize(a, width=(18, 36, 72)).values array([45., 18., 18., 45., 72.]) Dimensions without coordinates: dim_0 @@ -1508,10 +1520,16 @@ def values(self) -> DataArray | None: array([0.5, 0. , 0. , 0.5, 2. , 3. ]) Dimensions without coordinates: dim_0 - >>> _Normalize(a, width=[18, 72]).values + >>> _Normalize(a, width=(18, 36, 72)).values array([27., 18., 18., 27., 54., 72.]) Dimensions without coordinates: dim_0 + + >>> _Normalize(a * 0, width=(18, 36, 72)).values + + array([36., 36., 36., 36., 36., 36.]) + Dimensions without coordinates: dim_0 + """ if self.data is None: return None @@ -1536,14 +1554,14 @@ def _values_unique(self) -> np.ndarray | None: >>> _Normalize(a)._values_unique array([1, 3, 5]) - >>> _Normalize(a, width=[18, 72])._values_unique + >>> _Normalize(a, width=(18, 36, 72))._values_unique array([18., 45., 72.]) >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3]) >>> _Normalize(a)._values_unique array([0. , 0.5, 2. , 3. ]) - >>> _Normalize(a, width=[18, 72])._values_unique + >>> _Normalize(a, width=(18, 36, 72))._values_unique array([18., 27., 54., 72.]) """ if self.data is None: @@ -1615,7 +1633,7 @@ def format(self) -> FuncFormatter: Examples -------- >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3]) - >>> aa = _Normalize(a, width=[0, 1]) + >>> aa = _Normalize(a, width=(0, 0.5, 1)) >>> aa._lookup 0.000000 0.0 0.166667 0.5 @@ -1641,7 +1659,7 @@ def func(self) -> Callable[[Any, None | Any], Any]: Examples -------- >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3]) - >>> aa = _Normalize(a, width=[0, 1]) + >>> aa = _Normalize(a, width=(0, 0.5, 1)) >>> aa._lookup 0.000000 0.0 0.166667 0.5 @@ -1663,7 +1681,7 @@ def _determine_guide( sizeplt_norm: _Normalize, add_colorbar: None | bool = None, add_legend: None | bool = None, - plotfunc_name: str = None, + plotfunc_name: str | None = None, ) -> tuple[bool, bool]: if plotfunc_name == "hist": return False, False diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 584053d0213..6970a34b63d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -35,6 +35,8 @@ except ImportError: pass +# https://github.com/pydata/xarray/issues/7322 +warnings.filterwarnings("ignore", "'urllib3.contrib.pyopenssl' module is deprecated") arm_xfail = pytest.mark.xfail( platform.machine() == "aarch64" or "arm" in platform.machine(), @@ -68,7 +70,6 @@ def _importorskip( has_cftime, requires_cftime = _importorskip("cftime") has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") -has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") has_rasterio, requires_rasterio = _importorskip("rasterio") has_zarr, requires_zarr = _importorskip("zarr") has_fsspec, requires_fsspec = _importorskip("fsspec") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 64030b3f595..81417f8a06a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -102,6 +102,24 @@ except ImportError: pass +have_zarr_kvstore = False +try: + from zarr.storage import KVStore + + have_zarr_kvstore = True +except ImportError: + KVStore = None + +have_zarr_v3 = False +try: + # as of Zarr v2.13 these imports require environment variable + # ZARR_V3_EXPERIMENTAL_API=1 + from zarr import DirectoryStoreV3, KVStoreV3 + + have_zarr_v3 = True +except ImportError: + KVStoreV3 = None + ON_WINDOWS = sys.platform == "win32" default_value = object() dask_array_type = array_type("dask") @@ -1735,6 +1753,8 @@ def test_write_inconsistent_chunks(self) -> None: class ZarrBase(CFEncodedBase): DIMENSION_KEY = "_ARRAY_DIMENSIONS" + zarr_version = 2 + version_kwargs: dict[str, Any] = {} def create_zarr_target(self): raise NotImplementedError @@ -1742,14 +1762,18 @@ def create_zarr_target(self): @contextlib.contextmanager def create_store(self): with self.create_zarr_target() as store_target: - yield backends.ZarrStore.open_group(store_target, mode="w") + yield backends.ZarrStore.open_group( + store_target, mode="w", **self.version_kwargs + ) def save(self, dataset, store_target, **kwargs): - return dataset.to_zarr(store=store_target, **kwargs) + return dataset.to_zarr(store=store_target, **kwargs, **self.version_kwargs) @contextlib.contextmanager def open(self, store_target, **kwargs): - with xr.open_dataset(store_target, engine="zarr", **kwargs) as ds: + with xr.open_dataset( + store_target, engine="zarr", **kwargs, **self.version_kwargs + ) as ds: yield ds @contextlib.contextmanager @@ -1767,24 +1791,30 @@ def roundtrip( @pytest.mark.parametrize("consolidated", [False, True, None]) def test_roundtrip_consolidated(self, consolidated) -> None: + if consolidated and self.zarr_version > 2: + pytest.xfail("consolidated metadata is not supported for zarr v3 yet") expected = create_test_data() with self.roundtrip( expected, - save_kwargs={"consolidated": True}, - open_kwargs={"backend_kwargs": {"consolidated": True}}, + save_kwargs={"consolidated": consolidated}, + open_kwargs={"backend_kwargs": {"consolidated": consolidated}}, ) as actual: self.check_dtypes_roundtripped(expected, actual) assert_identical(expected, actual) def test_read_non_consolidated_warning(self) -> None: + + if self.zarr_version > 2: + pytest.xfail("consolidated metadata is not supported for zarr v3 yet") + expected = create_test_data() with self.create_zarr_target() as store: - expected.to_zarr(store, consolidated=False) + expected.to_zarr(store, consolidated=False, **self.version_kwargs) with pytest.warns( RuntimeWarning, match="Failed to open Zarr store with consolidated", ): - with xr.open_zarr(store) as ds: + with xr.open_zarr(store, **self.version_kwargs) as ds: assert_identical(ds, expected) def test_non_existent_store(self) -> None: @@ -2076,10 +2106,14 @@ def test_write_persistence_modes(self, group) -> None: # check append mode for append write ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w", group=group) - ds_to_append.to_zarr(store_target, append_dim="time", group=group) + ds.to_zarr(store_target, mode="w", group=group, **self.version_kwargs) + ds_to_append.to_zarr( + store_target, append_dim="time", group=group, **self.version_kwargs + ) original = xr.concat([ds, ds_to_append], dim="time") - actual = xr.open_dataset(store_target, group=group, engine="zarr") + actual = xr.open_dataset( + store_target, group=group, engine="zarr", **self.version_kwargs + ) assert_identical(original, actual) def test_compressor_encoding(self) -> None: @@ -2119,8 +2153,8 @@ def test_append_with_mode_rplus_success(self) -> None: original = Dataset({"foo": ("x", [1])}) modified = Dataset({"foo": ("x", [2])}) with self.create_zarr_target() as store: - original.to_zarr(store) - modified.to_zarr(store, mode="r+") + original.to_zarr(store, **self.version_kwargs) + modified.to_zarr(store, mode="r+", **self.version_kwargs) with self.open(store) as actual: assert_identical(actual, modified) @@ -2128,61 +2162,69 @@ def test_append_with_mode_rplus_fails(self) -> None: original = Dataset({"foo": ("x", [1])}) modified = Dataset({"bar": ("x", [2])}) with self.create_zarr_target() as store: - original.to_zarr(store) + original.to_zarr(store, **self.version_kwargs) with pytest.raises( ValueError, match="dataset contains non-pre-existing variables" ): - modified.to_zarr(store, mode="r+") + modified.to_zarr(store, mode="r+", **self.version_kwargs) def test_append_with_invalid_dim_raises(self) -> None: ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises( ValueError, match="does not match any existing dataset dimensions" ): - ds_to_append.to_zarr(store_target, append_dim="notvalid") + ds_to_append.to_zarr( + store_target, append_dim="notvalid", **self.version_kwargs + ) def test_append_with_no_dims_raises(self) -> None: with self.create_zarr_target() as store_target: - Dataset({"foo": ("x", [1])}).to_zarr(store_target, mode="w") + Dataset({"foo": ("x", [1])}).to_zarr( + store_target, mode="w", **self.version_kwargs + ) with pytest.raises(ValueError, match="different dimension names"): - Dataset({"foo": ("y", [2])}).to_zarr(store_target, mode="a") + Dataset({"foo": ("y", [2])}).to_zarr( + store_target, mode="a", **self.version_kwargs + ) def test_append_with_append_dim_not_set_raises(self) -> None: ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises(ValueError, match="different dimension sizes"): - ds_to_append.to_zarr(store_target, mode="a") + ds_to_append.to_zarr(store_target, mode="a", **self.version_kwargs) def test_append_with_mode_not_a_raises(self) -> None: ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises(ValueError, match="cannot set append_dim unless"): - ds_to_append.to_zarr(store_target, mode="w", append_dim="time") + ds_to_append.to_zarr( + store_target, mode="w", append_dim="time", **self.version_kwargs + ) def test_append_with_existing_encoding_raises(self) -> None: ds, ds_to_append, _ = create_append_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises(ValueError, match="but encoding was provided"): ds_to_append.to_zarr( store_target, append_dim="time", encoding={"da": {"compressor": None}}, + **self.version_kwargs, ) @pytest.mark.parametrize("dtype", ["U", "S"]) def test_append_string_length_mismatch_raises(self, dtype) -> None: ds, ds_to_append = create_append_string_length_mismatch_test_data(dtype) with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + ds.to_zarr(store_target, mode="w", **self.version_kwargs) with pytest.raises(ValueError, match="Mismatched dtypes for variable"): ds_to_append.to_zarr( - store_target, - append_dim="time", + store_target, append_dim="time", **self.version_kwargs ) def test_check_encoding_is_consistent_after_append(self) -> None: @@ -2195,13 +2237,17 @@ def test_check_encoding_is_consistent_after_append(self) -> None: compressor = zarr.Blosc() encoding = {"da": {"compressor": compressor}} - ds.to_zarr(store_target, mode="w", encoding=encoding) - ds_to_append.to_zarr(store_target, append_dim="time") - actual_ds = xr.open_dataset(store_target, engine="zarr") + ds.to_zarr(store_target, mode="w", encoding=encoding, **self.version_kwargs) + ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs) + actual_ds = xr.open_dataset( + store_target, engine="zarr", **self.version_kwargs + ) actual_encoding = actual_ds["da"].encoding["compressor"] assert actual_encoding.get_config() == compressor.get_config() assert_identical( - xr.open_dataset(store_target, engine="zarr").compute(), + xr.open_dataset( + store_target, engine="zarr", **self.version_kwargs + ).compute(), xr.concat([ds, ds_to_append], dim="time"), ) @@ -2211,11 +2257,16 @@ def test_append_with_new_variable(self) -> None: # check append mode for new variable with self.create_zarr_target() as store_target: - xr.concat([ds, ds_to_append], dim="time").to_zarr(store_target, mode="w") - ds_with_new_var.to_zarr(store_target, mode="a") + xr.concat([ds, ds_to_append], dim="time").to_zarr( + store_target, mode="w", **self.version_kwargs + ) + ds_with_new_var.to_zarr(store_target, mode="a", **self.version_kwargs) combined = xr.concat([ds, ds_to_append], dim="time") combined["new_var"] = ds_with_new_var["new_var"] - assert_identical(combined, xr.open_dataset(store_target, engine="zarr")) + assert_identical( + combined, + xr.open_dataset(store_target, engine="zarr", **self.version_kwargs), + ) @requires_dask def test_to_zarr_compute_false_roundtrip(self) -> None: @@ -2291,12 +2342,14 @@ def test_no_warning_from_open_emptydim_with_chunks(self) -> None: with self.roundtrip(ds, open_kwargs=dict(chunks={"a": 1})) as ds_reload: assert_identical(ds, ds_reload) - @pytest.mark.parametrize("consolidated", [False, True]) + @pytest.mark.parametrize("consolidated", [False, True, None]) @pytest.mark.parametrize("compute", [False, True]) @pytest.mark.parametrize("use_dask", [False, True]) def test_write_region(self, consolidated, compute, use_dask) -> None: if (use_dask or not compute) and not has_dask: pytest.skip("requires dask") + if consolidated and self.zarr_version > 2: + pytest.xfail("consolidated metadata is not supported for zarr v3 yet") zeros = Dataset({"u": (("x",), np.zeros(10))}) nonzeros = Dataset({"u": (("x",), np.arange(1, 11))}) @@ -2311,16 +2364,24 @@ def test_write_region(self, consolidated, compute, use_dask) -> None: consolidated=consolidated, compute=compute, encoding={"u": dict(chunks=2)}, + **self.version_kwargs, ) if compute: - with xr.open_zarr(store, consolidated=consolidated) as actual: + with xr.open_zarr( + store, consolidated=consolidated, **self.version_kwargs + ) as actual: assert_identical(actual, zeros) for i in range(0, 10, 2): region = {"x": slice(i, i + 2)} nonzeros.isel(region).to_zarr( - store, region=region, consolidated=consolidated + store, + region=region, + consolidated=consolidated, + **self.version_kwargs, ) - with xr.open_zarr(store, consolidated=consolidated) as actual: + with xr.open_zarr( + store, consolidated=consolidated, **self.version_kwargs + ) as actual: assert_identical(actual, nonzeros) @pytest.mark.parametrize("mode", [None, "r+", "a"]) @@ -2328,10 +2389,12 @@ def test_write_region_mode(self, mode) -> None: zeros = Dataset({"u": (("x",), np.zeros(10))}) nonzeros = Dataset({"u": (("x",), np.arange(1, 11))}) with self.create_zarr_target() as store: - zeros.to_zarr(store) + zeros.to_zarr(store, **self.version_kwargs) for region in [{"x": slice(5)}, {"x": slice(5, 10)}]: - nonzeros.isel(region).to_zarr(store, region=region, mode=mode) - with xr.open_zarr(store) as actual: + nonzeros.isel(region).to_zarr( + store, region=region, mode=mode, **self.version_kwargs + ) + with xr.open_zarr(store, **self.version_kwargs) as actual: assert_identical(actual, nonzeros) @requires_dask @@ -2355,8 +2418,8 @@ def test_write_preexisting_override_metadata(self) -> None: ) with self.create_zarr_target() as store: - original.to_zarr(store, compute=False) - both_modified.to_zarr(store, mode="a") + original.to_zarr(store, compute=False, **self.version_kwargs) + both_modified.to_zarr(store, mode="a", **self.version_kwargs) with self.open(store) as actual: # NOTE: this arguably incorrect -- we should probably be # overriding the variable metadata, too. See the TODO note in @@ -2364,15 +2427,17 @@ def test_write_preexisting_override_metadata(self) -> None: assert_identical(actual, global_modified) with self.create_zarr_target() as store: - original.to_zarr(store, compute=False) - both_modified.to_zarr(store, mode="r+") + original.to_zarr(store, compute=False, **self.version_kwargs) + both_modified.to_zarr(store, mode="r+", **self.version_kwargs) with self.open(store) as actual: assert_identical(actual, only_new_data) with self.create_zarr_target() as store: - original.to_zarr(store, compute=False) + original.to_zarr(store, compute=False, **self.version_kwargs) # with region, the default mode becomes r+ - both_modified.to_zarr(store, region={"x": slice(None)}) + both_modified.to_zarr( + store, region={"x": slice(None)}, **self.version_kwargs + ) with self.open(store) as actual: assert_identical(actual, only_new_data) @@ -2383,7 +2448,7 @@ def test_write_region_errors(self) -> None: @contextlib.contextmanager def setup_and_verify_store(expected=data): with self.create_zarr_target() as store: - data.to_zarr(store) + data.to_zarr(store, **self.version_kwargs) yield store with self.open(store) as actual: assert_identical(actual, expected) @@ -2391,7 +2456,7 @@ def setup_and_verify_store(expected=data): # verify the base case works expected = Dataset({"u": (("x",), np.array([10, 11, 2, 3, 4]))}) with setup_and_verify_store(expected) as store: - data2.to_zarr(store, region={"x": slice(2)}) + data2.to_zarr(store, region={"x": slice(2)}, **self.version_kwargs) with setup_and_verify_store() as store: with pytest.raises( @@ -2400,46 +2465,57 @@ def setup_and_verify_store(expected=data): "cannot set region unless mode='a', mode='r+' or mode=None" ), ): - data.to_zarr(store, region={"x": slice(None)}, mode="w") + data.to_zarr( + store, region={"x": slice(None)}, mode="w", **self.version_kwargs + ) with setup_and_verify_store() as store: with pytest.raises(TypeError, match=r"must be a dict"): - data.to_zarr(store, region=slice(None)) # type: ignore[call-overload] + data.to_zarr(store, region=slice(None), **self.version_kwargs) # type: ignore[call-overload] with setup_and_verify_store() as store: with pytest.raises(TypeError, match=r"must be slice objects"): - data2.to_zarr(store, region={"x": [0, 1]}) # type: ignore[dict-item] + data2.to_zarr(store, region={"x": [0, 1]}, **self.version_kwargs) # type: ignore[dict-item] with setup_and_verify_store() as store: with pytest.raises(ValueError, match=r"step on all slices"): - data2.to_zarr(store, region={"x": slice(None, None, 2)}) + data2.to_zarr( + store, region={"x": slice(None, None, 2)}, **self.version_kwargs + ) with setup_and_verify_store() as store: with pytest.raises( ValueError, match=r"all keys in ``region`` are not in Dataset dimensions", ): - data.to_zarr(store, region={"y": slice(None)}) + data.to_zarr(store, region={"y": slice(None)}, **self.version_kwargs) with setup_and_verify_store() as store: with pytest.raises( ValueError, match=r"all variables in the dataset to write must have at least one dimension in common", ): - data2.assign(v=2).to_zarr(store, region={"x": slice(2)}) + data2.assign(v=2).to_zarr( + store, region={"x": slice(2)}, **self.version_kwargs + ) with setup_and_verify_store() as store: with pytest.raises( ValueError, match=r"cannot list the same dimension in both" ): - data.to_zarr(store, region={"x": slice(None)}, append_dim="x") + data.to_zarr( + store, + region={"x": slice(None)}, + append_dim="x", + **self.version_kwargs, + ) with setup_and_verify_store() as store: with pytest.raises( ValueError, match=r"variable 'u' already exists with different dimension sizes", ): - data2.to_zarr(store, region={"x": slice(3)}) + data2.to_zarr(store, region={"x": slice(3)}, **self.version_kwargs) @requires_dask def test_encoding_chunksizes(self) -> None: @@ -2481,10 +2557,10 @@ def test_chunk_encoding_with_larger_dask_chunks(self) -> None: def test_open_zarr_use_cftime(self) -> None: ds = create_test_data() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target) - ds_a = xr.open_zarr(store_target) + ds.to_zarr(store_target, **self.version_kwargs) + ds_a = xr.open_zarr(store_target, **self.version_kwargs) assert_identical(ds, ds_a) - ds_b = xr.open_zarr(store_target, use_cftime=True) + ds_b = xr.open_zarr(store_target, use_cftime=True, **self.version_kwargs) assert xr.coding.times.contains_cftime_datetimes(ds_b.time) def test_write_read_select_write(self) -> None: @@ -2493,13 +2569,13 @@ def test_write_read_select_write(self) -> None: # NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug. with self.create_zarr_target() as initial_store: - ds.to_zarr(initial_store, mode="w") - ds1 = xr.open_zarr(initial_store) + ds.to_zarr(initial_store, mode="w", **self.version_kwargs) + ds1 = xr.open_zarr(initial_store, **self.version_kwargs) # Combination of where+squeeze triggers error on write. ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3") with self.create_zarr_target() as final_store: - ds_sel.to_zarr(final_store, mode="w") + ds_sel.to_zarr(final_store, mode="w", **self.version_kwargs) @pytest.mark.parametrize("obj", [Dataset(), DataArray(name="foo")]) def test_attributes(self, obj) -> None: @@ -2508,21 +2584,24 @@ def test_attributes(self, obj) -> None: obj.attrs["good"] = {"key": "value"} ds = obj if isinstance(obj, Dataset) else obj.to_dataset() with self.create_zarr_target() as store_target: - ds.to_zarr(store_target) - assert_identical(ds, xr.open_zarr(store_target)) + ds.to_zarr(store_target, **self.version_kwargs) + assert_identical(ds, xr.open_zarr(store_target, **self.version_kwargs)) obj.attrs["bad"] = DataArray() ds = obj if isinstance(obj, Dataset) else obj.to_dataset() with self.create_zarr_target() as store_target: with pytest.raises(TypeError, match=r"Invalid attribute in Dataset.attrs."): - ds.to_zarr(store_target) + ds.to_zarr(store_target, **self.version_kwargs) @requires_zarr class TestZarrDictStore(ZarrBase): @contextlib.contextmanager def create_zarr_target(self): - yield {} + if have_zarr_kvstore: + yield KVStore({}) + else: + yield {} @requires_zarr @@ -2532,6 +2611,55 @@ def create_zarr_target(self): with create_tmp_file(suffix=".zarr") as tmp: yield tmp + @contextlib.contextmanager + def create_store(self): + with self.create_zarr_target() as store_target: + group = backends.ZarrStore.open_group(store_target, mode="w") + # older Zarr versions do not have the _store_version attribute + if have_zarr_v3: + # verify that a v2 store was created + assert group.zarr_group.store._store_version == 2 + yield group + + +class ZarrBaseV3(ZarrBase): + zarr_version = 3 + + def test_roundtrip_coordinates_with_space(self): + original = Dataset(coords={"x": 0, "y z": 1}) + with pytest.warns(SerializationWarning): + # v3 stores do not allow spaces in the key name + with pytest.raises(ValueError): + with self.roundtrip(original): + pass + + +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") +class TestZarrKVStoreV3(ZarrBaseV3): + @contextlib.contextmanager + def create_zarr_target(self): + yield KVStoreV3({}) + + +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") +class TestZarrDirectoryStoreV3(ZarrBaseV3): + @contextlib.contextmanager + def create_zarr_target(self): + with create_tmp_file(suffix=".zr3") as tmp: + yield DirectoryStoreV3(tmp) + + +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") +class TestZarrDirectoryStoreV3FromPath(TestZarrDirectoryStoreV3): + # Must specify zarr_version=3 to get a v3 store because create_zarr_target + # is a string path. + version_kwargs = {"zarr_version": 3} + + @contextlib.contextmanager + def create_zarr_target(self): + with create_tmp_file(suffix=".zr3") as tmp: + yield tmp + @requires_zarr @requires_fsspec @@ -3025,19 +3153,18 @@ def test_open_badbytes(self) -> None: with pytest.raises( ValueError, match=r"not the signature of a valid netCDF4 file" ): - with open_dataset(BytesIO(b"garbage"), engine="h5netcdf"): # type: ignore[arg-type] + with open_dataset(BytesIO(b"garbage"), engine="h5netcdf"): pass def test_open_twice(self) -> None: expected = create_test_data() expected.attrs["foo"] = "bar" - with pytest.raises(ValueError, match=r"read/write pointer not at the start"): - with create_tmp_file() as tmp_file: - expected.to_netcdf(tmp_file, engine="h5netcdf") - with open(tmp_file, "rb") as f: + with create_tmp_file() as tmp_file: + expected.to_netcdf(tmp_file, engine="h5netcdf") + with open(tmp_file, "rb") as f: + with open_dataset(f, engine="h5netcdf"): with open_dataset(f, engine="h5netcdf"): - with open_dataset(f, engine="h5netcdf"): - pass + pass @requires_scipy def test_open_fileobj(self) -> None: @@ -3069,15 +3196,7 @@ def test_open_fileobj(self) -> None: # `raises_regex`?). Ref https://github.com/pydata/xarray/pull/5191 with open(tmp_file, "rb") as f: f.seek(8) - with pytest.raises( - ValueError, - match="match in any of xarray's currently installed IO", - ): - with pytest.warns( - RuntimeWarning, - match=re.escape("'h5netcdf' fails while guessing"), - ): - open_dataset(f) + open_dataset(f) @requires_h5netcdf @@ -4949,6 +5068,12 @@ def test_extract_nc4_variable_encoding(self) -> None: encoding = _extract_nc4_variable_encoding(var, unlimited_dims=("x",)) assert {} == encoding + @requires_netCDF4 + def test_extract_nc4_variable_encoding_netcdf4(self, monkeypatch): + # New netCDF4 1.6.0 compression argument. + var = xr.Variable(("x",), [1, 2, 3], {}, {"compression": "szlib"}) + _extract_nc4_variable_encoding(var, backend="netCDF4", raise_on_invalid=True) + def test_extract_h5nc_encoding(self) -> None: # not supported with h5netcdf (yet) var = xr.Variable(("x",), [1, 2, 3], {}, {"least_sigificant_digit": 2}) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 075393e84e7..d28f4594559 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1385,3 +1385,9 @@ def test_date_range_like_errors(): match="'source' must be a 1D array of datetime objects for inferring its range.", ): date_range_like(da, "noleap") + + +def as_timedelta_not_implemented_error(): + tick = Tick() + with pytest.raises(NotImplementedError): + tick.as_timedelta() diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 35447a39f3c..e780421e09e 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -9,7 +9,7 @@ import xarray as xr from xarray.core.resample_cftime import CFTimeGrouper -pytest.importorskip("cftime") +cftime = pytest.importorskip("cftime") # Create a list of pairs of similar-length initial and resample frequencies @@ -50,7 +50,63 @@ ] -def da(index): +def compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + freq, + closed=None, + label=None, + base=None, + offset=None, + origin=None, + loffset=None, +) -> None: + if isinstance(origin, tuple): + origin_pandas = pd.Timestamp(datetime.datetime(*origin)) + origin_cftime = cftime.DatetimeGregorian(*origin) + else: + origin_pandas = origin + origin_cftime = origin + + try: + result_datetimeindex = da_datetimeindex.resample( + time=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + offset=offset, + origin=origin_pandas, + ).mean() + except ValueError: + with pytest.raises(ValueError): + da_cftimeindex.resample( + time=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + origin=origin_cftime, + offset=offset, + ).mean() + else: + result_cftimeindex = da_cftimeindex.resample( + time=freq, + closed=closed, + label=label, + base=base, + loffset=loffset, + origin=origin_cftime, + offset=offset, + ).mean() + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass + result_cftimeindex["time"] = ( + result_cftimeindex.xindexes["time"].to_pandas_index().to_datetimeindex() + ) + xr.testing.assert_identical(result_cftimeindex, result_datetimeindex) + + +def da(index) -> xr.DataArray: return xr.DataArray( np.arange(100.0, 100.0 + index.size), coords=[index], dims=["time"] ) @@ -59,53 +115,31 @@ def da(index): @pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x)) @pytest.mark.parametrize("closed", [None, "left", "right"]) @pytest.mark.parametrize("label", [None, "left", "right"]) -@pytest.mark.parametrize("base", [24, 31]) -def test_resample(freqs, closed, label, base) -> None: +@pytest.mark.parametrize( + ("base", "offset"), [(24, None), (31, None), (None, "5S")], ids=lambda x: f"{x}" +) +def test_resample(freqs, closed, label, base, offset) -> None: initial_freq, resample_freq = freqs start = "2000-01-01T12:07:01" + loffset = "12H" + origin = "start" index_kwargs = dict(start=start, periods=5, freq=initial_freq) datetime_index = pd.date_range(**index_kwargs) cftime_index = xr.cftime_range(**index_kwargs) + da_datetimeindex = da(datetime_index) + da_cftimeindex = da(cftime_index) - loffset = "12H" - try: - da_datetime = ( - da(datetime_index) - .resample( - time=resample_freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - ) - .mean() - ) - except ValueError: - with pytest.raises(ValueError): - da(cftime_index).resample( - time=resample_freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - ).mean() - else: - da_cftime = ( - da(cftime_index) - .resample( - time=resample_freq, - closed=closed, - label=label, - base=base, - loffset=loffset, - ) - .mean() - ) - # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass - da_cftime["time"] = ( - da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() - ) - xr.testing.assert_identical(da_cftime, da_datetime) + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + label=label, + base=base, + offset=offset, + origin=origin, + loffset=loffset, + ) @pytest.mark.parametrize( @@ -153,3 +187,54 @@ def test_calendars(calendar) -> None: # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass da_cftime["time"] = da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) + + +@pytest.mark.parametrize("closed", ["left", "right"]) +@pytest.mark.parametrize( + "origin", + ["start_day", "start", "end", "end_day", "epoch", (1970, 1, 1, 3, 2)], + ids=lambda x: f"{x}", +) +def test_origin(closed, origin) -> None: + initial_freq, resample_freq = ("3H", "9H") + start = "1969-12-31T12:07:01" + index_kwargs = dict(start=start, periods=12, freq=initial_freq) + datetime_index = pd.date_range(**index_kwargs) + cftime_index = xr.cftime_range(**index_kwargs) + da_datetimeindex = da(datetime_index) + da_cftimeindex = da(cftime_index) + + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + origin=origin, + ) + + +def test_base_and_offset_error(): + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + with pytest.raises(ValueError, match="base and offset cannot"): + da_cftime.resample(time="2D", base=3, offset="5S") + + +@pytest.mark.parametrize("offset", ["foo", "5MS", 10]) +def test_invalid_offset_error(offset) -> None: + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + with pytest.raises(ValueError, match="offset must be"): + da_cftime.resample(time="2D", offset=offset) + + +def test_timedelta_offset() -> None: + timedelta = datetime.timedelta(seconds=5) + string = "5S" + + cftime_index = xr.cftime_range("2000", periods=5) + da_cftime = da(cftime_index) + + timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean() + string_result = da_cftime.resample(time="2D", offset=string).mean() + xr.testing.assert_identical(timedelta_result, string_result) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index a73e5472893..ca3b93728aa 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -614,7 +614,7 @@ def test_cf_timedelta_2d() -> None: actual = coding.times.decode_cf_timedelta(numbers, units) assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype # type: ignore + assert expected.dtype == actual.dtype @pytest.mark.parametrize( @@ -651,7 +651,7 @@ def test_format_cftime_datetime(date_args, expected) -> None: def test_decode_cf(calendar) -> None: days = [1.0, 2.0, 3.0] # TODO: GH5690 — do we want to allow this type for `coords`? - da = DataArray(days, coords=[days], dims=["time"], name="test") # type: ignore + da = DataArray(days, coords=[days], dims=["time"], name="test") ds = da.to_dataset() for v in ["test", "time"]: diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index da1bd014064..73889c362fe 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1925,16 +1925,63 @@ def test_where() -> None: def test_where_attrs() -> None: - cond = xr.DataArray([True, False], dims="x", attrs={"attr": "cond"}) - x = xr.DataArray([1, 1], dims="x", attrs={"attr": "x"}) - y = xr.DataArray([0, 0], dims="x", attrs={"attr": "y"}) + cond = xr.DataArray([True, False], coords={"a": [0, 1]}, attrs={"attr": "cond_da"}) + cond["a"].attrs = {"attr": "cond_coord"} + x = xr.DataArray([1, 1], coords={"a": [0, 1]}, attrs={"attr": "x_da"}) + x["a"].attrs = {"attr": "x_coord"} + y = xr.DataArray([0, 0], coords={"a": [0, 1]}, attrs={"attr": "y_da"}) + y["a"].attrs = {"attr": "y_coord"} + + # 3 DataArrays, takes attrs from x actual = xr.where(cond, x, y, keep_attrs=True) - expected = xr.DataArray([1, 0], dims="x", attrs={"attr": "x"}) + expected = xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"}) + expected["a"].attrs = {"attr": "x_coord"} assert_identical(expected, actual) - # ensure keep_attrs can handle scalar values + # x as a scalar, takes no attrs + actual = xr.where(cond, 0, y, keep_attrs=True) + expected = xr.DataArray([0, 0], coords={"a": [0, 1]}) + assert_identical(expected, actual) + + # y as a scalar, takes attrs from x + actual = xr.where(cond, x, 0, keep_attrs=True) + expected = xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"}) + expected["a"].attrs = {"attr": "x_coord"} + assert_identical(expected, actual) + + # x and y as a scalar, takes no attrs actual = xr.where(cond, 1, 0, keep_attrs=True) - assert actual.attrs == {} + expected = xr.DataArray([1, 0], coords={"a": [0, 1]}) + assert_identical(expected, actual) + + # cond and y as a scalar, takes attrs from x + actual = xr.where(True, x, y, keep_attrs=True) + expected = xr.DataArray([1, 1], coords={"a": [0, 1]}, attrs={"attr": "x_da"}) + expected["a"].attrs = {"attr": "x_coord"} + assert_identical(expected, actual) + + # DataArray and 2 Datasets, takes attrs from x + ds_x = xr.Dataset(data_vars={"x": x}, attrs={"attr": "x_ds"}) + ds_y = xr.Dataset(data_vars={"x": y}, attrs={"attr": "y_ds"}) + ds_actual = xr.where(cond, ds_x, ds_y, keep_attrs=True) + ds_expected = xr.Dataset( + data_vars={ + "x": xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"}) + }, + attrs={"attr": "x_ds"}, + ) + ds_expected["a"].attrs = {"attr": "x_coord"} + assert_identical(ds_expected, ds_actual) + + # 2 DataArrays and 1 Dataset, takes attrs from x + ds_actual = xr.where(cond, x.rename("x"), ds_y, keep_attrs=True) + ds_expected = xr.Dataset( + data_vars={ + "x": xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"}) + }, + ) + ds_expected["a"].attrs = {"attr": "x_coord"} + assert_identical(ds_expected, ds_actual) @pytest.mark.parametrize( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index ac6049872b8..8184fe1955c 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -512,7 +512,7 @@ def test_equals_and_identical(self) -> None: def test_equals_failures(self) -> None: orig = DataArray(np.arange(5.0), {"a": 42}, dims="x") - assert not orig.equals(np.arange(5)) # type: ignore + assert not orig.equals(np.arange(5)) # type: ignore[arg-type] assert not orig.identical(123) # type: ignore assert not orig.broadcast_equals({1: 2}) # type: ignore @@ -2754,9 +2754,9 @@ def test_quantile_method(self, method) -> None: actual = DataArray(self.va).quantile(q, method=method) if Version(np.__version__) >= Version("1.22.0"): - expected = np.nanquantile(self.dv.values, np.array(q), method=method) # type: ignore[call-arg] + expected = np.nanquantile(self.dv.values, np.array(q), method=method) else: - expected = np.nanquantile(self.dv.values, np.array(q), interpolation=method) # type: ignore[call-arg] + expected = np.nanquantile(self.dv.values, np.array(q), interpolation=method) np.testing.assert_allclose(actual.values, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 23ea705db71..8f3eb728f01 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6230,7 +6230,7 @@ def test_query(self, backend, engine, parser) -> None: with pytest.raises(ValueError): ds.query("a > 5") # type: ignore # must be dict or kwargs with pytest.raises(ValueError): - ds.query(x=(a > 5)) # type: ignore # must be query string + ds.query(x=(a > 5)) with pytest.raises(IndexError): ds.query(y="a > 5") # wrong length dimension with pytest.raises(IndexError): diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index 780417c488b..2dd589fd872 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -189,7 +189,7 @@ def test_dask_distributed_zarr_integration_test( write_kwargs: dict[str, Any] = {"consolidated": True} read_kwargs: dict[str, Any] = {"backend_kwargs": {"consolidated": True}} else: - write_kwargs = read_kwargs = {} # type: ignore + write_kwargs = read_kwargs = {} chunks = {"dim1": 4, "dim2": 3, "dim3": 5} with cluster() as (s, [a, b]): with Client(s["address"], loop=loop): diff --git a/xarray/tests/test_extensions.py b/xarray/tests/test_extensions.py index 6f91cdf661e..d4414b1f53a 100644 --- a/xarray/tests/test_extensions.py +++ b/xarray/tests/test_extensions.py @@ -37,7 +37,6 @@ def foo(self): da = xr.DataArray(0) assert da.demo.foo == "bar" - # accessor is cached assert ds.demo is ds.demo @@ -45,7 +44,7 @@ def foo(self): assert ds.demo.__doc__ == "Demo accessor." # TODO: typing doesn't seem to work with accessors assert xr.Dataset.demo.__doc__ == "Demo accessor." # type: ignore - assert isinstance(ds.demo, DemoAccessor) # type: ignore + assert isinstance(ds.demo, DemoAccessor) assert xr.Dataset.demo is DemoAccessor # type: ignore # ensure we can remove it diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 814111f3afd..293a758b629 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -236,13 +236,13 @@ def test_da_groupby_quantile() -> None: dims=("x", "y"), ) - actual_x = array.groupby("x").quantile(0, dim=...) # type: ignore[arg-type] # https://github.com/python/mypy/issues/7818 + actual_x = array.groupby("x").quantile(0, dim=...) expected_x = xr.DataArray( data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x" ) assert_identical(expected_x, actual_x) - actual_y = array.groupby("y").quantile(0, dim=...) # type: ignore[arg-type] # https://github.com/python/mypy/issues/7818 + actual_y = array.groupby("y").quantile(0, dim=...) expected_y = xr.DataArray( data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y" ) @@ -273,7 +273,7 @@ def test_da_groupby_quantile() -> None: ) g = foo.groupby(foo.time.dt.month) - actual = g.quantile(0, dim=...) # type: ignore[arg-type] # https://github.com/python/mypy/issues/7818 + actual = g.quantile(0, dim=...) expected = xr.DataArray( data=[ 0.0, @@ -357,11 +357,11 @@ def test_ds_groupby_quantile() -> None: coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]}, ) - actual_x = ds.groupby("x").quantile(0, dim=...) # type: ignore[arg-type] # https://github.com/python/mypy/issues/7818 + actual_x = ds.groupby("x").quantile(0, dim=...) expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0}) assert_identical(expected_x, actual_x) - actual_y = ds.groupby("y").quantile(0, dim=...) # type: ignore[arg-type] # https://github.com/python/mypy/issues/7818 + actual_y = ds.groupby("y").quantile(0, dim=...) expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0}) assert_identical(expected_y, actual_y) @@ -387,7 +387,7 @@ def test_ds_groupby_quantile() -> None: ) g = foo.groupby(foo.time.dt.month) - actual = g.quantile(0, dim=...) # type: ignore[arg-type] # https://github.com/python/mypy/issues/7818 + actual = g.quantile(0, dim=...) expected = xr.Dataset( { "a": ( @@ -1864,6 +1864,33 @@ def test_upsample_interpolate_dask(self, chunked_time): # done here due to floating point arithmetic assert_allclose(expected, actual, rtol=1e-16) + def test_resample_base(self) -> None: + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + base = 11 + actual = array.resample(time="24H", base=base).mean() + expected = DataArray(array.to_series().resample("24H", base=base).mean()) + assert_identical(expected, actual) + + def test_resample_offset(self) -> None: + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + offset = pd.Timedelta("11H") + actual = array.resample(time="24H", offset=offset).mean() + expected = DataArray(array.to_series().resample("24H", offset=offset).mean()) + assert_identical(expected, actual) + + def test_resample_origin(self) -> None: + times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10) + array = DataArray(np.arange(10), [("time", times)]) + + origin = "start" + actual = array.resample(time="24H", origin=origin).mean() + expected = DataArray(array.to_series().resample("24H", origin=origin).mean()) + assert_identical(expected, actual) + class TestDatasetResample: def test_resample_and_first(self): diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 01f616f92ba..d4a707b4e23 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -14,6 +14,7 @@ import xarray as xr import xarray.plot as xplt from xarray import DataArray, Dataset +from xarray.core.utils import module_available from xarray.plot.dataarray_plot import _infer_interval_breaks from xarray.plot.dataset_plot import _infer_meta_data from xarray.plot.utils import ( @@ -29,19 +30,20 @@ from . import ( assert_array_equal, assert_equal, - has_nc_time_axis, requires_cartopy, requires_cftime, requires_matplotlib, - requires_nc_time_axis, requires_seaborn, ) +# this should not be imported to test if the automatic lazy import works +has_nc_time_axis = module_available("nc_time_axis") + # import mpl and change the backend before other mpl imports try: import matplotlib as mpl import matplotlib.pyplot as plt - import mpl_toolkits # type: ignore + import mpl_toolkits except ImportError: pass @@ -2823,8 +2825,8 @@ def test_datetime_plot2d(self) -> None: @pytest.mark.filterwarnings("ignore:setting an array element with a sequence") -@requires_nc_time_axis @requires_cftime +@pytest.mark.skipif(not has_nc_time_axis, reason="nc_time_axis is not installed") class TestCFDatetimePlot(PlotTestCase): @pytest.fixture(autouse=True) def setUp(self) -> None: @@ -3206,7 +3208,7 @@ def test_plot_empty_raises(val: list | float, method: str) -> None: @requires_matplotlib -def test_facetgrid_axes_raises_deprecation_warning(): +def test_facetgrid_axes_raises_deprecation_warning() -> None: with pytest.warns( DeprecationWarning, match=( diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index d44973e25e4..8029eb3f228 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -203,6 +203,7 @@ def test_lazy_import() -> None: "scipy", "zarr", "matplotlib", + "nc_time_axis", "flox", # "dask", # TODO: backends.locks is not lazy yet :( "dask.array", diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index c4b6294603f..a1a9a41d782 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Hashable +from typing import Hashable, Iterable, Sequence import numpy as np import pandas as pd @@ -253,6 +253,90 @@ def test_infix_dims_errors(supplied, all_): list(utils.infix_dims(supplied, all_)) +@pytest.mark.parametrize( + ["dim", "expected"], + [ + pytest.param("a", ("a",), id="str"), + pytest.param(["a", "b"], ("a", "b"), id="list_of_str"), + pytest.param(["a", 1], ("a", 1), id="list_mixed"), + pytest.param(("a", "b"), ("a", "b"), id="tuple_of_str"), + pytest.param(["a", ("b", "c")], ("a", ("b", "c")), id="list_with_tuple"), + pytest.param((("b", "c"),), (("b", "c"),), id="tuple_of_tuple"), + pytest.param(None, None, id="None"), + pytest.param(..., ..., id="ellipsis"), + ], +) +def test_parse_dims( + dim: str | Iterable[Hashable] | None, + expected: tuple[Hashable, ...], +) -> None: + all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables + actual = utils.parse_dims(dim, all_dims, replace_none=False) + assert actual == expected + + +def test_parse_dims_set() -> None: + all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables + dim = {"a", 1} + actual = utils.parse_dims(dim, all_dims) + assert set(actual) == dim + + +@pytest.mark.parametrize( + "dim", [pytest.param(None, id="None"), pytest.param(..., id="ellipsis")] +) +def test_parse_dims_replace_none(dim: None | ellipsis) -> None: + all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables + actual = utils.parse_dims(dim, all_dims, replace_none=True) + assert actual == all_dims + + +@pytest.mark.parametrize( + "dim", + [ + pytest.param("x", id="str_missing"), + pytest.param(["a", "x"], id="list_missing_one"), + pytest.param(["x", 2], id="list_missing_all"), + ], +) +def test_parse_dims_raises(dim: str | Iterable[Hashable]) -> None: + all_dims = ("a", "b", 1, ("b", "c")) # selection of different Hashables + with pytest.raises(ValueError, match="'x'"): + utils.parse_dims(dim, all_dims, check_exists=True) + + +@pytest.mark.parametrize( + ["dim", "expected"], + [ + pytest.param("a", ("a",), id="str"), + pytest.param(["a", "b"], ("a", "b"), id="list"), + pytest.param([...], ("a", "b", "c"), id="list_only_ellipsis"), + pytest.param(["a", ...], ("a", "b", "c"), id="list_with_ellipsis"), + pytest.param(["a", ..., "b"], ("a", "c", "b"), id="list_with_middle_ellipsis"), + ], +) +def test_parse_ordered_dims( + dim: str | Sequence[Hashable | ellipsis], + expected: tuple[Hashable, ...], +) -> None: + all_dims = ("a", "b", "c") + actual = utils.parse_ordered_dims(dim, all_dims) + assert actual == expected + + +def test_parse_ordered_dims_raises() -> None: + all_dims = ("a", "b", "c") + + with pytest.raises(ValueError, match="'x' do not exist"): + utils.parse_ordered_dims("x", all_dims, check_exists=True) + + with pytest.raises(ValueError, match="repeated dims"): + utils.parse_ordered_dims(["a", ...], all_dims + ("a",)) + + with pytest.raises(ValueError, match="More than one ellipsis"): + utils.parse_ordered_dims(["a", ..., "b", ...], all_dims) + + @pytest.mark.parametrize( "nested_list, expected", [ @@ -266,3 +350,13 @@ def test_infix_dims_errors(supplied, all_): ) def test_iterate_nested(nested_list, expected): assert list(iterate_nested(nested_list)) == expected + + +def test_find_stack_level(): + assert utils.find_stack_level() == 1 + assert utils.find_stack_level(test_mode=True) == 2 + + def f(): + return utils.find_stack_level(test_mode=True) + + assert f() == 3 diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 983c584f69d..3a6c8f1b966 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1769,9 +1769,9 @@ def test_quantile_method(self, method, use_dask) -> None: actual = v.quantile(q, dim="y", method=method) if Version(np.__version__) >= Version("1.22"): - expected = np.nanquantile(self.d, q, axis=1, method=method) # type: ignore[call-arg] + expected = np.nanquantile(self.d, q, axis=1, method=method) else: - expected = np.nanquantile(self.d, q, axis=1, interpolation=method) # type: ignore[call-arg] + expected = np.nanquantile(self.d, q, axis=1, interpolation=method) if use_dask: assert isinstance(actual.data, dask_array_type) diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 58cd5b20b9c..b2a61ed5eb2 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -60,7 +60,7 @@ class {obj}{cls}Aggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -71,7 +71,7 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> {obj}: raise NotImplementedError()""" @@ -84,7 +84,7 @@ class {obj}{cls}Aggregations: def reduce( self, func: Callable[..., Any], - dim: Dims | ellipsis = None, + dim: Dims = None, *, axis: int | Sequence[int] | None = None, keep_attrs: bool | None = None, @@ -95,7 +95,7 @@ def reduce( def _flox_reduce( self, - dim: Dims | ellipsis, + dim: Dims, **kwargs: Any, ) -> {obj}: raise NotImplementedError()""" @@ -117,7 +117,7 @@ def {method}( TEMPLATE_REDUCTION_SIGNATURE_GROUPBY = ''' def {method}( self, - dim: Dims | ellipsis = None, + dim: Dims = None, *,{extra_kwargs} keep_attrs: bool | None = None, **kwargs: Any, @@ -149,9 +149,9 @@ def {method}( ----- {notes}""" -_DIM_DOCSTRING = """dim : str, Iterable of Hashable, or None, default: None +_DIM_DOCSTRING = """dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If None, will reduce over all dimensions.""" + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.""" _DIM_DOCSTRING_GROUPBY = """dim : str, Iterable of Hashable, "..." or None, default: None Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` diff --git a/xarray/util/generate_ops.py b/xarray/util/generate_ops.py index 7407e3d3f4f..0ca5954b056 100644 --- a/xarray/util/generate_ops.py +++ b/xarray/util/generate_ops.py @@ -110,7 +110,7 @@ def {method}(self: T_Dataset, other: DsCompatible) -> T_Dataset: ...{override}"" @overload{override} def {method}(self, other: T_Dataset) -> T_Dataset: ... @overload - def {method}(self, other: "DatasetGroupBy") -> "Dataset": ... # type: ignore[misc] + def {method}(self, other: "DatasetGroupBy") -> "Dataset": ... @overload def {method}(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...""" stub_var = """\ @@ -124,7 +124,7 @@ def {method}(self: T_Variable, other: VarCompatible) -> T_Variable: ...""" @overload{override} def {method}(self, other: T_Dataset) -> T_Dataset: ... @overload - def {method}(self, other: "DataArray") -> "Dataset": ... # type: ignore[misc] + def {method}(self, other: "DataArray") -> "Dataset": ... @overload def {method}(self, other: GroupByIncompatible) -> NoReturn: ...""" stub_dagb = """\ diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index b8689e3a18f..d53f1aab65e 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -129,6 +129,7 @@ def show_versions(file=sys.stdout): ("pip", lambda mod: mod.__version__), ("conda", lambda mod: mod.__version__), ("pytest", lambda mod: mod.__version__), + ("mypy", lambda mod: importlib.metadata.version(mod.__name__)), # Misc. ("IPython", lambda mod: mod.__version__), ("sphinx", lambda mod: mod.__version__),