From 2c68a8b851581773733ab75dfd36590b528635e3 Mon Sep 17 00:00:00 2001 From: Thomas Vogt <57705593+tovogt@users.noreply.github.com> Date: Sun, 10 Apr 2022 16:45:38 +0200 Subject: [PATCH 1/7] Don't overwrite user-defined coordinates attributes (#6366) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- xarray/conventions.py | 2 +- xarray/tests/test_conventions.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/xarray/conventions.py b/xarray/conventions.py index ae915069947..ebe95b62721 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -770,7 +770,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): # this will copy coordinates from encoding to attrs if "coordinates" in attrs # after the next line, "coordinates" is never in encoding # we get support for attrs["coordinates"] for free. - coords_str = pop_to(encoding, attrs, "coordinates") + coords_str = pop_to(encoding, attrs, "coordinates") or attrs.get("coordinates") if not coords_str and variable_coordinates[name]: coordinates_text = " ".join( str(coord_name) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 83e560e7208..d5d8e00d45f 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -128,6 +128,25 @@ def test_multidimensional_coordinates(self) -> None: # Should not have any global coordinates. assert "coordinates" not in attrs + def test_var_with_coord_attr(self) -> None: + # regression test for GH6310 + # don't overwrite user-defined "coordinates" attributes + orig = Dataset( + {"values": ("time", np.zeros(2), {"coordinates": "time lon lat"})}, + coords={ + "time": ("time", np.zeros(2)), + "lat": ("time", np.zeros(2)), + "lon": ("time", np.zeros(2)), + }, + ) + # Encode the coordinates, as they would be in a netCDF output file. + enc, attrs = conventions.encode_dataset_coordinates(orig) + # Make sure we have the right coordinates for each variable. + values_coords = enc["values"].attrs.get("coordinates", "") + assert set(values_coords.split()) == {"time", "lat", "lon"} + # Should not have any global coordinates. + assert "coordinates" not in attrs + def test_do_not_overwrite_user_coordinates(self) -> None: orig = Dataset( coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])}, From 79f9c7bcc2bbfdc492d24f690e85864e44f6115b Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Sun, 10 Apr 2022 12:16:05 -0700 Subject: [PATCH 2/7] Try waking up stalebot (#6463) We're almost at the 1000 mark of issues, and when browsing I see a non-trivial number that look stale. I'm not sure why stalebot doesn't seem to be awake; I'll try this and see if it helps --- .github/stale.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/stale.yml b/.github/stale.yml index f4057844d01..bb8b88505c1 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -1,7 +1,7 @@ # Configuration for probot-stale - https://github.com/probot/stale # Number of days of inactivity before an Issue or Pull Request becomes stale -daysUntilStale: 700 # start with a large number and reduce shortly +daysUntilStale: 600 # start with a large number and reduce shortly # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. @@ -31,6 +31,9 @@ markComment: | If this issue remains relevant, please comment here or remove the `stale` label; otherwise it will be marked as closed automatically +closeComment: | + The stalebot didn't hear anything for a while, so it closed this. Please reopen if this is still an issue. + # Comment to post when removing the stale label. # unmarkComment: > # Your comment here. @@ -40,8 +43,7 @@ markComment: | # Your comment here. # Limit the number of actions per hour, from 1-30. Default is 30 -limitPerRun: 1 # start with a small number - +limitPerRun: 2 # start with a small number # Limit to only `issues` or `pulls` # only: issues From 18039533d85a42939494505d5cf2ae96d3258d79 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Apr 2022 08:41:51 -0600 Subject: [PATCH 3/7] Bump actions/download-artifact from 2 to 3 (#6469) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pypi-release.yaml | 4 ++-- .github/workflows/upstream-dev-ci.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index c88cf556a50..9af938d2ecb 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -54,7 +54,7 @@ jobs: name: Install Python with: python-version: 3.8 - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: releases path: dist @@ -85,7 +85,7 @@ jobs: if: github.event_name == 'release' runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: releases path: dist diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 6091306ed8b..2283db6a6d9 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -114,7 +114,7 @@ jobs: - uses: actions/setup-python@v3 with: python-version: "3.x" - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: path: /tmp/workspace/logs - name: Move all log files into a single directory From 36a5093bdae405b2550657913a1d1e0ffc92d3ca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Apr 2022 08:42:13 -0600 Subject: [PATCH 4/7] Bump codecov/codecov-action from 2.1.0 to 3.0.0 (#6470) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci-additional.yaml | 2 +- .github/workflows/ci.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index ef1666359fe..f2542ab52d5 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -109,7 +109,7 @@ jobs: $PYTEST_EXTRA_FLAGS - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v2.1.0 + uses: codecov/codecov-action@v3.0.0 with: file: ./coverage.xml flags: unittests,${{ matrix.env }} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 205265b8c54..c07d2350e09 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -104,7 +104,7 @@ jobs: path: pytest.xml - name: Upload code coverage to Codecov - uses: codecov/codecov-action@v2.1.0 + uses: codecov/codecov-action@v3.0.0 with: file: ./coverage.xml flags: unittests From 2a6392b645029100de748fd2823f62e70d80f0c8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Apr 2022 08:42:22 -0600 Subject: [PATCH 5/7] Bump actions/upload-artifact from 2 to 3 (#6468) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/benchmarks.yml | 2 +- .github/workflows/ci.yaml | 4 ++-- .github/workflows/pypi-release.yaml | 2 +- .github/workflows/upstream-dev-ci.yaml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 6d482445f96..034ffee40ad 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -67,7 +67,7 @@ jobs: cp benchmarks/README_CI.md benchmarks.log .asv/results/ working-directory: ${{ env.ASV_DIR }} - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 if: always() with: name: asv-benchmark-results-${{ runner.os }} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c07d2350e09..a5c1a2de5ad 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -98,7 +98,7 @@ jobs: - name: Upload test results if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: Test results for ${{ runner.os }}-${{ matrix.python-version }} path: pytest.xml @@ -118,7 +118,7 @@ jobs: if: github.repository == 'pydata/xarray' steps: - name: Upload - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index 9af938d2ecb..9cad271ce6f 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -41,7 +41,7 @@ jobs: else echo "✅ Looks good" fi - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: releases path: dist diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 2283db6a6d9..81d1c7db4b8 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -92,7 +92,7 @@ jobs: && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository == 'pydata/xarray' - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: output-${{ matrix.python-version }}-log path: output-${{ matrix.python-version }}-log From ec13944bbd4022614491b6ec479ff2618da14ba8 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Mon, 11 Apr 2022 15:36:40 -0400 Subject: [PATCH 6/7] Support **kwargs form in `.chunk()` (#6471) * updated dataset chunk test * add kwargs form to dataset * updated dataarray chunk test * add kwargs form to dataarray * copied FutureWarning to DataArray.chunk * add tests for existing chunk functionality on Variable * updated variable chunk test * add kwargs form to variable * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * whatsnew * handle empty dict in either_dict_or_kwargs * Change numbers.Number to float Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/whats-new.rst | 3 +++ xarray/core/dataarray.py | 18 +++++++++++++++++- xarray/core/dataset.py | 10 ++++++++-- xarray/core/utils.py | 2 +- xarray/core/variable.py | 24 ++++++++++++++++++++++-- xarray/tests/test_dataarray.py | 5 +++++ xarray/tests/test_dataset.py | 5 ++++- xarray/tests/test_variable.py | 34 ++++++++++++++++++++++++++++++++++ 8 files changed, 94 insertions(+), 7 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c84a0549774..1341708e5f8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,9 @@ New Features elements which trigger summarization rather than full repr in (numpy) array detailed views of the html repr (:pull:`6400`). By `Benoît Bovy `_. +- Allow passing chunks in **kwargs form to :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and + :py:meth:`Variable.chunk`. (:pull:`6471`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index df1e096b021..49ba2e71665 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1113,6 +1113,7 @@ def chunk( name_prefix: str = "xarray-", token: str = None, lock: bool = False, + **chunks_kwargs: Any, ) -> DataArray: """Coerce this array's data into a dask arrays with the given chunks. @@ -1136,13 +1137,28 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. Returns ------- chunked : xarray.DataArray """ - if isinstance(chunks, (tuple, list)): + if chunks is None: + warnings.warn( + "None value for 'chunks' is deprecated. " + "It will raise an error in the future. Use instead '{}'", + category=FutureWarning, + ) + chunks = {} + + if isinstance(chunks, (Number, str, int)): + chunks = dict.fromkeys(self.dims, chunks) + elif isinstance(chunks, (tuple, list)): chunks = dict(zip(self.dims, chunks)) + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") ds = self._to_temp_dataset().chunk( chunks, name_prefix=name_prefix, token=token, lock=lock diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6ff7a1f76e9..2c67cd665ca 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1994,6 +1994,7 @@ def chunk( name_prefix: str = "xarray-", token: str = None, lock: bool = False, + **chunks_kwargs: Any, ) -> Dataset: """Coerce all arrays in this dataset into dask arrays with the given chunks. @@ -2007,7 +2008,7 @@ def chunk( Parameters ---------- - chunks : int, "auto" or mapping of hashable to int, optional + chunks : int, tuple of int, "auto" or mapping of hashable to int, optional Chunk sizes along each dimension, e.g., ``5``, ``"auto"``, or ``{"x": 5, "y": 5}``. name_prefix : str, optional @@ -2017,6 +2018,9 @@ def chunk( lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided Returns ------- @@ -2028,7 +2032,7 @@ def chunk( Dataset.chunksizes xarray.unify_chunks """ - if chunks is None: + if chunks is None and chunks_kwargs is None: warnings.warn( "None value for 'chunks' is deprecated. " "It will raise an error in the future. Use instead '{}'", @@ -2038,6 +2042,8 @@ def chunk( if isinstance(chunks, (Number, str, int)): chunks = dict.fromkeys(self.dims, chunks) + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") bad_dims = chunks.keys() - self.dims.keys() if bad_dims: diff --git a/xarray/core/utils.py b/xarray/core/utils.py index a0f5bfdcf27..efdbe10d5ef 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -266,7 +266,7 @@ def either_dict_or_kwargs( kw_kwargs: Mapping[str, T], func_name: str, ) -> Mapping[Hashable, T]: - if pos_kwargs is None: + if pos_kwargs is None or pos_kwargs == {}: # Need an explicit cast to appease mypy due to invariance; see # https://github.com/python/mypy/issues/6228 return cast(Mapping[Hashable, T], kw_kwargs) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a21cf8c2d97..05c70390b46 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,7 +5,7 @@ import numbers import warnings from datetime import timedelta -from typing import TYPE_CHECKING, Any, Hashable, Mapping, Sequence +from typing import TYPE_CHECKING, Any, Hashable, Literal, Mapping, Sequence import numpy as np import pandas as pd @@ -1012,7 +1012,19 @@ def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: _array_counter = itertools.count() - def chunk(self, chunks={}, name=None, lock=False): + def chunk( + self, + chunks: ( + int + | Literal["auto"] + | tuple[int, ...] + | tuple[tuple[int, ...], ...] + | Mapping[Any, None | int | tuple[int, ...]] + ) = {}, + name: str = None, + lock: bool = False, + **chunks_kwargs: Any, + ) -> Variable: """Coerce this array's data into a dask array with the given chunks. If this variable is a non-dask array, it will be converted to dask @@ -1034,6 +1046,9 @@ def chunk(self, chunks={}, name=None, lock=False): lock : optional Passed on to :py:func:`dask.array.from_array`, if the array is not already as dask array. + **chunks_kwargs : {dim: chunks, ...}, optional + The keyword arguments form of ``chunks``. + One of chunks or chunks_kwargs must be provided. Returns ------- @@ -1049,6 +1064,11 @@ def chunk(self, chunks={}, name=None, lock=False): ) chunks = {} + if isinstance(chunks, (float, str, int, tuple, list)): + pass # dask.array.from_array can handle these directly + else: + chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") + if utils.is_dict_like(chunks): chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 65efb3a732c..b8c9edd7258 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -804,6 +804,11 @@ def test_chunk(self): assert isinstance(blocked.data, da.Array) assert "testname_" in blocked.data.name + # test kwargs form of chunks + blocked = unblocked.chunk(dim_0=3, dim_1=3) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + def test_isel(self): assert_identical(self.dv[0], self.dv.isel(x=0)) assert_identical(self.dv, self.dv.isel(x=slice(None))) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5f368375fc0..25adda2df84 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -921,6 +921,9 @@ def test_chunk(self): expected_chunks = {"dim1": (8,), "dim2": (9,), "dim3": (10,)} assert reblocked.chunks == expected_chunks + # test kwargs form of chunks + assert data.chunk(**expected_chunks).chunks == expected_chunks + def get_dask_names(ds): return {k: v.data.name for k, v in ds.items()} @@ -947,7 +950,7 @@ def get_dask_names(ds): new_dask_names = get_dask_names(reblocked) assert reblocked.chunks == expected_chunks assert_identical(reblocked, data) - # recuhnking with same chunk sizes should not change names + # rechunking with same chunk sizes should not change names for k, v in new_dask_names.items(): assert v == orig_dask_names[k] diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index b8e2f6f4582..0168f19b921 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2154,6 +2154,40 @@ def test_coarsen_keep_attrs(self, operation="mean"): class TestVariableWithDask(VariableSubclassobjects): cls = staticmethod(lambda *args: Variable(*args).chunk()) + def test_chunk(self): + unblocked = Variable(["dim_0", "dim_1"], np.ones((3, 4))) + assert unblocked.chunks is None + + blocked = unblocked.chunk() + assert blocked.chunks == ((3,), (4,)) + first_dask_name = blocked.data.name + + blocked = unblocked.chunk(chunks=((2, 1), (2, 2))) + assert blocked.chunks == ((2, 1), (2, 2)) + assert blocked.data.name != first_dask_name + + blocked = unblocked.chunk(chunks=(3, 3)) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + + # name doesn't change when rechunking by same amount + # this fails if ReprObject doesn't have __dask_tokenize__ defined + assert unblocked.chunk(2).data.name == unblocked.chunk(2).data.name + + assert blocked.load().chunks is None + + # Check that kwargs are passed + import dask.array as da + + blocked = unblocked.chunk(name="testname_") + assert isinstance(blocked.data, da.Array) + assert "testname_" in blocked.data.name + + # test kwargs form of chunks + blocked = unblocked.chunk(dim_0=3, dim_1=3) + assert blocked.chunks == ((3,), (3, 1)) + assert blocked.data.name != first_dask_name + @pytest.mark.xfail def test_0d_object_array_with_list(self): super().test_0d_object_array_with_list() From c12cb3d11789bbc3660ac43ed1c7705410beb93d Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Mon, 11 Apr 2022 14:07:51 -0700 Subject: [PATCH 7/7] Fix `Number` import (#6474) --- xarray/core/dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 49ba2e71665..2cf78fa7c61 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1153,7 +1153,7 @@ def chunk( ) chunks = {} - if isinstance(chunks, (Number, str, int)): + if isinstance(chunks, (float, str, int)): chunks = dict.fromkeys(self.dims, chunks) elif isinstance(chunks, (tuple, list)): chunks = dict(zip(self.dims, chunks))