diff --git a/.git_archival.txt b/.git_archival.txt
index 95cb3eea4e3..8fb235d7045 100644
--- a/.git_archival.txt
+++ b/.git_archival.txt
@@ -1 +1,4 @@
+node: $Format:%H$
+node-date: $Format:%cI$
+describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$
 ref-names: $Format:%D$
diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
index 935729c055f..bdae56ae6db 100644
--- a/.github/workflows/ci-additional.yaml
+++ b/.github/workflows/ci-additional.yaml
@@ -119,7 +119,7 @@ jobs:
           python xarray/util/print_versions.py
       - name: Install mypy
         run: |
-          python -m pip install mypy
+          python -m pip install 'mypy<0.990'
 
       - name: Run mypy
         run: |
@@ -134,6 +134,64 @@ jobs:
           name: codecov-umbrella
           fail_ci_if_error: false
 
+  mypy38:
+    name: Mypy 3.8
+    runs-on: "ubuntu-latest"
+    needs: detect-ci-trigger
+    # temporarily skipping due to https://github.com/pydata/xarray/issues/6551
+    if: needs.detect-ci-trigger.outputs.triggered == 'false'
+    defaults:
+      run:
+        shell: bash -l {0}
+    env:
+      CONDA_ENV_FILE: ci/requirements/environment.yml
+      PYTHON_VERSION: "3.8"
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # Fetch all history for all branches and tags.
+
+      - name: set environment variables
+        run: |
+          echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+      - name: Setup micromamba
+        uses: mamba-org/provision-with-micromamba@v14
+        with:
+          environment-file: ${{env.CONDA_ENV_FILE}}
+          environment-name: xarray-tests
+          extra-specs: |
+            python=${{env.PYTHON_VERSION}}
+            conda
+          cache-env: true
+          cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
+      - name: Install xarray
+        run: |
+          python -m pip install --no-deps -e .
+      - name: Version info
+        run: |
+          conda info -a
+          conda list
+          python xarray/util/print_versions.py
+      - name: Install mypy
+        run: |
+          python -m pip install 'mypy<0.990'
+
+      - name: Run mypy
+        run: |
+          python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
+
+      - name: Upload mypy coverage to Codecov
+        uses: codecov/codecov-action@v3.1.1
+        with:
+          file: mypy_report/cobertura.xml
+          flags: mypy38
+          env_vars: PYTHON_VERSION
+          name: codecov-umbrella
+          fail_ci_if_error: false
+
+
+
   min-version-policy:
     name: Minimum Version Policy
     runs-on: "ubuntu-latest"
diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml
index 8c74b6988c0..68bd0c15067 100644
--- a/.github/workflows/upstream-dev-ci.yaml
+++ b/.github/workflows/upstream-dev-ci.yaml
@@ -80,6 +80,7 @@ jobs:
         if: success()
         id: status
         run: |
+          export ZARR_V3_EXPERIMENTAL_API=1
           python -m pytest --timeout=60 -rf \
             --report-log output-${{ matrix.python-version }}-log.jsonl
       - name: Generate and publish the report
@@ -88,6 +89,6 @@ jobs:
           && steps.status.outcome == 'failure'
           && github.event_name == 'schedule'
           && github.repository_owner == 'pydata'
-        uses: xarray-contrib/issue-from-pytest-log@v1.1
+        uses: xarray-contrib/issue-from-pytest-log@v1
         with:
           log-path: output-${{ matrix.python-version }}-log.jsonl
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index da02b230730..67dd54faf3a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 # https://pre-commit.com/
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
+    rev: v4.4.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
@@ -10,7 +10,7 @@ repos:
       - id: mixed-line-ending
     # This wants to go before isort & flake8
   - repo: https://github.com/PyCQA/autoflake
-    rev: "v1.7.7"
+    rev: "v2.0.0"
     hooks:
       - id: autoflake # isort should run before black as black sometimes tweaks the isort output
         args: ["--in-place", "--ignore-init-module-imports"]
@@ -19,7 +19,7 @@ repos:
     hooks:
       - id: isort
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.2.0
+    rev: v3.2.2
     hooks:
       - id: pyupgrade
         args:
@@ -31,14 +31,14 @@ repos:
       - id: black
       - id: black-jupyter
   - repo: https://github.com/keewis/blackdoc
-    rev: v0.3.7
+    rev: v0.3.8
     hooks:
       - id: blackdoc
         exclude: "generate_aggregations.py"
         additional_dependencies: ["black==22.10.0"]
       - id: blackdoc-autoupdate-black
   - repo: https://github.com/PyCQA/flake8
-    rev: 5.0.4
+    rev: 6.0.0
     hooks:
       - id: flake8
   # - repo: https://github.com/Carreau/velin
@@ -47,7 +47,7 @@ repos:
   #     - id: velin
   #       args: ["--write", "--compact"]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.982
+    rev: v0.991
     hooks:
       - id: mypy
         # Copied from setup.cfg
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index 5de6d6a4f76..b43e4d1e4e8 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -58,8 +58,7 @@
     //     "pip+emcee": [""],   // emcee is only available for install with pip.
     // },
     "matrix": {
-        "setuptools_scm[toml]": [""],  // GH6609
-        "setuptools_scm_git_archive": [""],  // GH6609
+        "setuptools_scm": [""],  // GH6609
         "numpy": [""],
         "pandas": [""],
         "netcdf4": [""],
diff --git a/asv_bench/benchmarks/renaming.py b/asv_bench/benchmarks/renaming.py
new file mode 100644
index 00000000000..3ade5d8df70
--- /dev/null
+++ b/asv_bench/benchmarks/renaming.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+import xarray as xr
+
+
+class SwapDims:
+    param_names = ["size"]
+    params = [[int(1e3), int(1e5), int(1e7)]]
+
+    def setup(self, size: int) -> None:
+        self.ds = xr.Dataset(
+            {"a": (("x", "t"), np.ones((size, 2)))},
+            coords={
+                "x": np.arange(size),
+                "y": np.arange(size),
+                "z": np.arange(size),
+                "x2": ("x", np.arange(size)),
+                "y2": ("y", np.arange(size)),
+                "z2": ("z", np.arange(size)),
+            },
+        )
+
+    def time_swap_dims(self, size: int) -> None:
+        self.ds.swap_dims({"x": "xn", "y": "yn", "z": "zn"})
+
+    def time_swap_dims_newindex(self, size: int) -> None:
+        self.ds.swap_dims({"x": "x2", "y": "y2", "z": "z2"})
diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py
index 3cc10c7ef32..23621c55560 100755
--- a/ci/min_deps_check.py
+++ b/ci/min_deps_check.py
@@ -30,12 +30,12 @@
 POLICY_MONTHS = {"python": 24, "numpy": 18}
 POLICY_MONTHS_DEFAULT = 12
 POLICY_OVERRIDE: Dict[str, Tuple[int, int]] = {}
-has_errors = False
+errors = []
 
 
 def error(msg: str) -> None:
-    global has_errors
-    has_errors = True
+    global errors
+    errors.append(msg)
     print("ERROR:", msg)
 
 
@@ -48,7 +48,7 @@ def parse_requirements(fname) -> Iterator[Tuple[str, int, int, Optional[int]]]:
 
     Yield (package name, major version, minor version, [patch version])
     """
-    global has_errors
+    global errors
 
     with open(fname) as fh:
         contents = yaml.safe_load(fh)
@@ -157,9 +157,9 @@ def process_pkg(
         status = "> (!)"
         delta = relativedelta(datetime.now(), policy_published_actual).normalized()
         n_months = delta.years * 12 + delta.months
-        error(
-            f"Package is too new: {pkg}={req_major}.{req_minor} was "
-            f"published on {versions[req_major, req_minor]:%Y-%m-%d} "
+        warning(
+            f"Package is too new: {pkg}={policy_major}.{policy_minor} was "
+            f"published on {versions[policy_major, policy_minor]:%Y-%m-%d} "
             f"which was {n_months} months ago (policy is {policy_months} months)"
         )
     else:
@@ -193,13 +193,18 @@ def main() -> None:
         for pkg, major, minor, patch in parse_requirements(fname)
     ]
 
-    print("Package           Required             Policy               Status")
+    print("\nPackage           Required             Policy               Status")
     print("----------------- -------------------- -------------------- ------")
     fmt = "{:17} {:7} ({:10}) {:7} ({:10}) {}"
     for row in rows:
         print(fmt.format(*row))
 
-    assert not has_errors
+    if errors:
+        print("\nErrors:")
+        print("-------")
+        for i, e in enumerate(errors):
+            print(f"{i+1}. {e}")
+        sys.exit(1)
 
 
 if __name__ == "__main__":
diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml
index a673e4a14c7..d30c94348d0 100644
--- a/ci/requirements/all-but-dask.yml
+++ b/ci/requirements/all-but-dask.yml
@@ -30,7 +30,6 @@ dependencies:
   - pip
   - pseudonetcdf
   - pydap
-  # - pynio  # not compatible with netCDF4>1.5.3, see #4491
   - pytest
   - pytest-cov
   - pytest-env
diff --git a/ci/requirements/bare-minimum.yml b/ci/requirements/bare-minimum.yml
index 7f35ccae987..fe1af1af027 100644
--- a/ci/requirements/bare-minimum.yml
+++ b/ci/requirements/bare-minimum.yml
@@ -11,5 +11,5 @@ dependencies:
   - pytest-env
   - pytest-xdist
   - numpy=1.20
-  - packaging=21.0
+  - packaging=21.3
   - pandas=1.3
diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml
index 8401e31a8fc..22bfa3543d3 100644
--- a/ci/requirements/environment-windows.yml
+++ b/ci/requirements/environment-windows.yml
@@ -30,7 +30,6 @@ dependencies:
   - pre-commit
   - pseudonetcdf
   - pydap
-  # - pynio  # Not available on Windows
   - pytest
   - pytest-cov
   - pytest-env
diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml
index 2d71233a610..d6bc8466c76 100644
--- a/ci/requirements/environment.yml
+++ b/ci/requirements/environment.yml
@@ -34,7 +34,6 @@ dependencies:
   - pre-commit
   - pseudonetcdf
   - pydap
-  # - pynio  # not compatible with netCDF4>1.5.3, see #4491
   - pytest
   - pytest-cov
   - pytest-env
diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml
index 1a2307aee5e..c7e157992f0 100644
--- a/ci/requirements/min-all-deps.yml
+++ b/ci/requirements/min-all-deps.yml
@@ -8,40 +8,38 @@ dependencies:
   # When upgrading python, numpy, or pandas, must also change
   # doc/user-guide/installing.rst, doc/user-guide/plotting.rst and setup.py.
   - python=3.8
-  - boto3=1.18
+  - boto3=1.20
   - bottleneck=1.3
-  - cartopy=0.19
+  - cartopy=0.20
   - cdms2=3.1
   - cfgrib=0.9
   - cftime=1.5
   - coveralls
-  - dask-core=2021.09
-  - distributed=2021.09
+  - dask-core=2021.11
+  - distributed=2021.11
   - flox=0.5
   - h5netcdf=0.11
-  # h5py and hdf5 tend to cause conflicrs
+  # h5py and hdf5 tend to cause conflicts
   # for e.g. hdf5 1.12 conflicts with h5py=3.1
   # prioritize bumping other packages instead
-  - h5py=3.1
-  - hdf5=1.10
+  - h5py=3.6
+  - hdf5=1.12
   - hypothesis
   - iris=3.1
   - lxml=4.6  # Optional dep of pydap
-  - matplotlib-base=3.4
-  - nc-time-axis=1.3
+  - matplotlib-base=3.5
+  - nc-time-axis=1.4
   # netcdf follows a 1.major.minor[.patch] convention
   # (see https://github.com/Unidata/netcdf4-python/issues/1090)
-  # bumping the netCDF4 version is currently blocked by #4491
-  - netcdf4=1.5.3
+  - netcdf4=1.5.7
   - numba=0.54
   - numpy=1.20
-  - packaging=21.0
+  - packaging=21.3
   - pandas=1.3
-  - pint=0.17
+  - pint=0.18
   - pip
-  - pseudonetcdf=3.1
+  - pseudonetcdf=3.2
   - pydap=3.2
-  # - pynio=1.5.5
   - pytest
   - pytest-cov
   - pytest-env
@@ -51,7 +49,7 @@ dependencies:
   - seaborn=0.11
   - sparse=0.13
   - toolz=0.11
-  - typing_extensions=3.10
+  - typing_extensions=4.0
   - zarr=2.10
   - pip:
     - numbagg==0.1
diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst
index d357843cdda..7124b0f87a5 100644
--- a/doc/getting-started-guide/installing.rst
+++ b/doc/getting-started-guide/installing.rst
@@ -8,7 +8,7 @@ Required dependencies
 
 - Python (3.8 or later)
 - `numpy <https://www.numpy.org/>`__ (1.20 or later)
-- `packaging <https://packaging.pypa.io/en/latest/#>`__ (21.0 or later)
+- `packaging <https://packaging.pypa.io/en/latest/#>`__ (21.3 or later)
 - `pandas <https://pandas.pydata.org/>`__ (1.3 or later)
 
 .. _optional-dependencies:
diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst
index beab5fc050b..f2d3c6cc9d9 100644
--- a/doc/user-guide/io.rst
+++ b/doc/user-guide/io.rst
@@ -162,11 +162,77 @@ To do so, pass a ``group`` keyword argument to the
 :py:func:`open_dataset` function. The group can be specified as a path-like
 string, e.g., to access subgroup ``'bar'`` within group ``'foo'`` pass
 ``'/foo/bar'`` as the ``group`` argument.
+
 In a similar way, the ``group`` keyword argument can be given to the
 :py:meth:`Dataset.to_netcdf` method to write to a group
 in a netCDF file.
 When writing multiple groups in one file, pass ``mode='a'`` to
 :py:meth:`Dataset.to_netcdf` to ensure that each call does not delete the file.
+For example:
+
+.. ipython::
+    :verbatim:
+
+    In [1]: ds1 = xr.Dataset({"a": 0})
+
+    In [2]: ds2 = xr.Dataset({"b": 1})
+
+    In [3]: ds1.to_netcdf("file.nc", group="A")
+
+    In [4]: ds2.to_netcdf("file.nc", group="B", mode="a")
+
+We can verify that two groups have been saved using the ncdump command-line utility.
+
+.. code:: bash
+
+    $ ncdump file.nc
+    netcdf file {
+
+    group: A {
+      variables:
+        int64 a ;
+      data:
+
+       a = 0 ;
+      } // group A
+
+    group: B {
+      variables:
+        int64 b ;
+      data:
+
+       b = 1 ;
+      } // group B
+    }
+
+Either of these groups can be loaded from the file as an independent :py:class:`Dataset` object:
+
+.. ipython::
+    :verbatim:
+
+    In [1]: group1 = xr.open_dataset("file.nc", group="A")
+
+    In [2]: group1
+    Out[2]:
+    <xarray.Dataset>
+    Dimensions:  ()
+    Data variables:
+        a        int64 ...
+
+    In [3]: group2 = xr.open_dataset("file.nc", group="B")
+
+    In [4]: group2
+    Out[4]:
+    <xarray.Dataset>
+    Dimensions:  ()
+    Data variables:
+        b        int64 ...
+
+.. note::
+
+    For native handling of multiple groups with xarray, including I/O, you might be interested in the experimental
+    `xarray-datatree <https://github.com/xarray-contrib/datatree>`_ package.
+
 
 .. _io.encoding:
 
@@ -1209,6 +1275,10 @@ We recommend installing cfgrib via conda::
 Formats supported by PyNIO
 --------------------------
 
+.. warning::
+
+    The PyNIO backend is deprecated_. PyNIO is no longer maintained_. See
+
 Xarray can also read GRIB, HDF4 and other file formats supported by PyNIO_,
 if PyNIO is installed. To use PyNIO to read such files, supply
 ``engine='pynio'`` to :py:func:`open_dataset`.
@@ -1217,12 +1287,9 @@ We recommend installing PyNIO via conda::
 
     conda install -c conda-forge pynio
 
-.. warning::
-
-    PyNIO is no longer actively maintained and conflicts with netcdf4 > 1.5.3.
-    The PyNIO backend may be moved outside of xarray in the future.
-
 .. _PyNIO: https://www.pyngl.ucar.edu/Nio.shtml
+.. _deprecated: https://github.com/pydata/xarray/issues/4491
+.. _maintained: https://github.com/NCAR/pynio/issues/53
 
 .. _io.PseudoNetCDF:
 
diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst
index 3a4e7ea88c6..95bf21a71b0 100644
--- a/doc/user-guide/reshaping.rst
+++ b/doc/user-guide/reshaping.rst
@@ -20,7 +20,7 @@ Reordering dimensions
 
 To reorder dimensions on a :py:class:`~xarray.DataArray` or across all variables
 on a :py:class:`~xarray.Dataset`, use :py:meth:`~xarray.DataArray.transpose`. An
-ellipsis (`...`) can be use to represent all other dimensions:
+ellipsis (`...`) can be used to represent all other dimensions:
 
 .. ipython:: python
 
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 6465c1e88ae..00dbe80485b 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -14,12 +14,79 @@ What's New
 
     np.random.seed(123456)
 
+.. _whats-new.2022.11.1:
 
-.. _whats-new.2022.10.1:
-
-v2022.10.1 (unreleased)
+v2022.11.1 (unreleased)
 -----------------------
 
+New Features
+~~~~~~~~~~~~
+- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample`
+  and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`7284`).  By `Spencer
+  Clark <https://github.com/spencerkclark>`_.
+- Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`).
+  By `Gregory Lee  <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+- The minimum versions of some dependencies were changed (:pull:`7300`):
+
+  ========================== =========  ========
+   Package                         Old      New
+  ========================== =========  ========
+   boto                           1.18     1.20
+   cartopy                        0.19     0.20
+   distributed                 2021.09  2021.11
+   dask                        2021.09  2021.11
+   h5py                            3.1      3.6
+   hdf5                           1.10     1.12
+   matplotlib-base                 3.4      3.5
+   nc-time-axis                    1.3      1.4
+   netcdf4                       1.5.3    1.5.7
+   packaging                      20.3     21.3
+   pint                           0.17     0.18
+   pseudonetcdf                    3.1      3.2
+   typing_extensions              3.10      4.0
+  ========================== =========  ========
+
+Deprecations
+~~~~~~~~~~~~
+- The PyNIO backend has been deprecated (:issue:`4491`, :pull:`7301`).
+  By `Joe Hamman <https://github.com/jhamman>`_.
+
+Bug fixes
+~~~~~~~~~
+- Fix handling of coordinate attributes in :py:func:`where`. (:issue:`7220`, :pull:`7229`)
+  By `Sam Levang <https://github.com/slevang>`_.
+- Import ``nc_time_axis`` when needed (:issue:`7275`, :pull:`7276`).
+  By `Michael Niklas <https://github.com/headtr1ck>`_.
+- Fix static typing of :py:meth:`xr.polyval` (:issue:`7312`, :pull:`7315`).
+  By `Michael Niklas <https://github.com/headtr1ck>`_.
+- Fix multiple reads on fsspec S3 files by resetting file pointer to 0 when reading file streams (:issue:`6813`, :pull:`7304`).
+  By `David Hoese <https://github.com/djhoese>`_ and `Wei Ji Leong <https://github.com/weiji14>`_.
+
+Documentation
+~~~~~~~~~~~~~
+
+- Add example of reading and writing individual groups to a single netCDF file to I/O docs page. (:pull:`7338`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
+
+Internal Changes
+~~~~~~~~~~~~~~~~
+
+
+.. _whats-new.2022.11.0:
+
+v2022.11.0 (Nov 4, 2022)
+------------------------
+
+This release brings a number of bugfixes and documentation improvements. Both text and HTML
+reprs now have a new "Indexes" section, which we expect will help with development of new
+Index objects. This release also features more support for the Python Array API.
+
+Many thanks to the 16 contributors to this release: Daniel Goman, Deepak Cherian, Illviljan, Jessica Scheick, Justus Magin, Mark Harfouche, Maximilian Roos, Mick, Patrick Naylor, Pierre, Spencer Clark, Stephan Hoyer, Tom Nicholas, Tom White
+
 New Features
 ~~~~~~~~~~~~
 
@@ -2534,7 +2601,7 @@ Breaking changes
   have removed the internal use of the ``OrderedDict`` in favor of Python's builtin
   ``dict`` object which is now ordered itself. This change will be most obvious when
   interacting with the ``attrs`` property on Dataset and DataArray objects.
-  (:issue:`3380`, :pull:`3389`). By `Joe Hamman <https://github.com/jhamman>`_.
+  (:issue:`3380`, :pull:`3389`).  HBy `Joeamman <https://github.com/jhamman>`_.
 
 New functions/methods
 ~~~~~~~~~~~~~~~~~~~~~
diff --git a/pyproject.toml b/pyproject.toml
index 271abc0aab1..fec91210aea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,8 +2,7 @@
 build-backend = "setuptools.build_meta"
 requires = [
   "setuptools>=42",
-  "setuptools-scm[toml]>=3.4",
-  "setuptools-scm-git-archive",
+  "setuptools-scm>=7",
 ]
 
 [tool.setuptools_scm]
@@ -27,6 +26,7 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"]
 exclude = 'xarray/util/generate_.*\.py'
 files = "xarray"
 show_error_codes = true
+warn_unused_ignores = true
 
 # Most of the numerical computing stack doesn't have type annotations yet.
 [[tool.mypy.overrides]]
@@ -40,8 +40,6 @@ module = [
   "cfgrib.*",
   "cftime.*",
   "cupy.*",
-  "dask.*",
-  "distributed.*",
   "fsspec.*",
   "h5netcdf.*",
   "h5py.*",
@@ -52,11 +50,9 @@ module = [
   "Nio.*",
   "nc_time_axis.*",
   "numbagg.*",
-  "numpy.*",
   "netCDF4.*",
   "netcdftime.*",
   "pandas.*",
-  "pint.*",
   "pooch.*",
   "PseudoNetCDF.*",
   "pydap.*",
@@ -70,8 +66,6 @@ module = [
   "zarr.*",
 ]
 
-# version spanning code is hard to type annotate (and most of this module will
-# be going away soon anyways)
 [[tool.mypy.overrides]]
 ignore_errors = true
-module = "xarray.core.pycompat"
+module = []
diff --git a/requirements.txt b/requirements.txt
index e7015650c8b..4eb5c4a6b67 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,5 +3,5 @@
 # https://help.github.com/en/github/visualizing-repository-data-with-graphs/listing-the-packages-that-a-repository-depends-on
 
 numpy >= 1.20
-packaging >= 21.0
+packaging >= 21.3
 pandas >= 1.3
diff --git a/setup.cfg b/setup.cfg
index 72dca2dec63..7919908e8ec 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -77,7 +77,7 @@ python_requires = >=3.8
 install_requires =
     numpy >= 1.20  # recommended to use >= 1.22 for full quantile method support
     pandas >= 1.3
-    packaging >= 21.0
+    packaging >= 21.3
 
 [options.extras_require]
 io =
@@ -147,11 +147,12 @@ markers =
 
 [flake8]
 ignore =
-    E203 # whitespace before ':' - doesn't work well with black
-    E402 # module level import not at top of file
-    E501 # line too long - let black worry about that
-    E731 # do not assign a lambda expression, use a def
-    W503 # line break before binary operator
+    # E203: whitespace before ':' - doesn't work well with black
+    # E402: module level import not at top of file
+    # E501: line too long - let black worry about that
+    # E731: do not assign a lambda expression, use a def
+    # W503: line break before binary operator
+    E203, E402, E501, E731, W503
 exclude =
     .eggs
     doc
diff --git a/xarray/__init__.py b/xarray/__init__.py
index 46dcf0e9b32..e920e94ca19 100644
--- a/xarray/__init__.py
+++ b/xarray/__init__.py
@@ -40,7 +40,7 @@
     from importlib.metadata import version as _version
 except ImportError:
     # if the fallback library is missing, we are doomed.
-    from importlib_metadata import version as _version  # type: ignore[no-redef]
+    from importlib_metadata import version as _version
 
 try:
     __version__ = _version("xarray")
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 13bcf046ac3..02cf425386b 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -9,6 +9,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
+    Dict,
     Final,
     Hashable,
     Iterable,
@@ -62,7 +63,7 @@
         str,  # no nice typing support for custom backends
         None,
     ]
-    T_Chunks = Union[int, dict[Any, Any], Literal["auto"], None]
+    T_Chunks = Union[int, Dict[Any, Any], Literal["auto"], None]
     T_NetcdfTypes = Literal[
         "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC"
     ]
@@ -916,7 +917,7 @@ def open_mfdataset(
     >>> lon_bnds, lat_bnds = (-110, -105), (40, 45)
     >>> partial_func = partial(_preprocess, lon_bnds=lon_bnds, lat_bnds=lat_bnds)
     >>> ds = xr.open_mfdataset(
-    ...     "file_*.nc", concat_dim="time", preprocess=_preprocess
+    ...     "file_*.nc", concat_dim="time", preprocess=partial_func
     ... )  # doctest: +SKIP
 
     References
@@ -1503,6 +1504,7 @@ def to_zarr(
     region: Mapping[str, slice] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,
+    zarr_version: int | None = None,
 ) -> backends.ZarrStore:
     ...
 
@@ -1524,6 +1526,7 @@ def to_zarr(
     region: Mapping[str, slice] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,
+    zarr_version: int | None = None,
 ) -> Delayed:
     ...
 
@@ -1542,6 +1545,7 @@ def to_zarr(
     region: Mapping[str, slice] | None = None,
     safe_chunks: bool = True,
     storage_options: dict[str, str] | None = None,
+    zarr_version: int | None = None,
 ) -> backends.ZarrStore | Delayed:
     """This function creates an appropriate datastore for writing a dataset to
     a zarr ztore
@@ -1608,6 +1612,13 @@ def to_zarr(
                 f"``region`` with to_zarr(), got {append_dim} in both"
             )
 
+    if zarr_version is None:
+        # default to 2 if store doesn't specify it's version (e.g. a path)
+        zarr_version = int(getattr(store, "_store_version", 2))
+
+    if consolidated is None and zarr_version > 2:
+        consolidated = False
+
     if mode == "r+":
         already_consolidated = consolidated
         consolidate_on_close = False
@@ -1626,6 +1637,7 @@ def to_zarr(
         write_region=region,
         safe_chunks=safe_chunks,
         stacklevel=4,  # for Dataset.to_zarr()
+        zarr_version=zarr_version,
     )
 
     if mode in ["a", "r+"]:
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index 10033587bf1..6a6f54704ac 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -4,7 +4,6 @@
 import io
 import os
 
-import numpy as np
 from packaging.version import Version
 
 from ..core import indexing
@@ -46,9 +45,6 @@ def __getitem__(self, key):
         )
 
     def _getitem(self, key):
-        # h5py requires using lists for fancy indexing:
-        # https://github.com/h5py/h5py/issues/992
-        key = tuple(list(k) if isinstance(k, np.ndarray) else k for k in key)
         with self.datastore.lock:
             array = self.get_array(needs_lock=False)
             return array[key]
diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py
index 73144df2b41..1de8fc8f7b8 100644
--- a/xarray/backends/locks.py
+++ b/xarray/backends/locks.py
@@ -61,7 +61,7 @@ def _get_lock_maker(scheduler=None):
         try:
             from dask.distributed import Lock as DistributedLock
         except ImportError:
-            DistributedLock = None  # type: ignore
+            DistributedLock = None
         return DistributedLock
     else:
         raise KeyError(scheduler)
diff --git a/xarray/backends/lru_cache.py b/xarray/backends/lru_cache.py
index 0f7f4c23b2e..c60ef3fc6b3 100644
--- a/xarray/backends/lru_cache.py
+++ b/xarray/backends/lru_cache.py
@@ -30,7 +30,7 @@ class LRUCache(MutableMapping[K, V]):
 
     __slots__ = ("_cache", "_lock", "_maxsize", "_on_evict")
 
-    def __init__(self, maxsize: int, on_evict: Callable[[K, V], Any] = None):
+    def __init__(self, maxsize: int, on_evict: Callable[[K, V], Any] | None = None):
         """
         Parameters
         ----------
diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
index 7b37b1a573a..f8b3192a4b9 100644
--- a/xarray/backends/netCDF4_.py
+++ b/xarray/backends/netCDF4_.py
@@ -232,11 +232,11 @@ def _extract_nc4_variable_encoding(
         "shuffle",
         "_FillValue",
         "dtype",
+        "compression",
     }
     if lsd_okay:
         valid_encodings.add("least_significant_digit")
     if h5py_okay:
-        valid_encodings.add("compression")
         valid_encodings.add("compression_opts")
 
     if not raise_on_invalid and encoding.get("chunksizes") is not None:
diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py
index 07fb2b94673..cb7c65c0432 100644
--- a/xarray/backends/pynio_.py
+++ b/xarray/backends/pynio_.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import warnings
+
 import numpy as np
 
 from ..core import indexing
@@ -55,6 +57,12 @@ class NioDataStore(AbstractDataStore):
     def __init__(self, filename, mode="r", lock=None, **kwargs):
         import Nio
 
+        warnings.warn(
+            "The PyNIO backend is Deprecated and will be removed from Xarray in a future release. "
+            "See https://github.com/pydata/xarray/issues/4491 for more information",
+            DeprecationWarning,
+        )
+
         if lock is None:
             lock = PYNIO_LOCK
         self.lock = ensure_lock(lock)
@@ -94,6 +102,15 @@ def close(self):
 
 
 class PynioBackendEntrypoint(BackendEntrypoint):
+    """
+    PyNIO backend
+
+        .. deprecated:: 0.20.0
+
+        Deprecated as PyNIO is no longer supported. See
+        https://github.com/pydata/xarray/issues/4491 for more information
+    """
+
     available = module_available("Nio")
 
     def open_dataset(
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 6c547f82252..cca2d89678f 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -353,6 +353,7 @@ def open_group(
         write_region=None,
         safe_chunks=True,
         stacklevel=2,
+        zarr_version=None,
     ):
         import zarr
 
@@ -360,12 +361,29 @@ def open_group(
         if isinstance(store, os.PathLike):
             store = os.fspath(store)
 
+        if zarr_version is None:
+            # default to 2 if store doesn't specify it's version (e.g. a path)
+            zarr_version = getattr(store, "_store_version", 2)
+
         open_kwargs = dict(
             mode=mode,
             synchronizer=synchronizer,
             path=group,
         )
         open_kwargs["storage_options"] = storage_options
+        if zarr_version > 2:
+            open_kwargs["zarr_version"] = zarr_version
+
+            if consolidated or consolidate_on_close:
+                raise ValueError(
+                    "consolidated metadata has not been implemented for zarr "
+                    f"version {zarr_version} yet. Set consolidated=False for "
+                    f"zarr version {zarr_version}. See also "
+                    "https://github.com/zarr-developers/zarr-specs/issues/136"
+                )
+
+            if consolidated is None:
+                consolidated = False
 
         if chunk_store:
             open_kwargs["chunk_store"] = chunk_store
@@ -440,6 +458,11 @@ def open_store_variable(self, name, zarr_array):
             zarr_array, DIMENSION_KEY, try_nczarr
         )
         attributes = dict(attributes)
+
+        # TODO: this should not be needed once
+        # https://github.com/zarr-developers/zarr-python/issues/1269 is resolved.
+        attributes.pop("filters", None)
+
         encoding = {
             "chunks": zarr_array.chunks,
             "preferred_chunks": dict(zip(dimensions, zarr_array.chunks)),
@@ -668,6 +691,7 @@ def open_zarr(
     storage_options=None,
     decode_timedelta=None,
     use_cftime=None,
+    zarr_version=None,
     **kwargs,
 ):
     """Load and decode a dataset from a Zarr store.
@@ -725,6 +749,9 @@ def open_zarr(
         capability. Only works for stores that have already been consolidated.
         By default (`consolidate=None`), attempts to read consolidated metadata,
         falling back to read non-consolidated metadata if that fails.
+
+        When the experimental ``zarr_version=3``, ``consolidated`` must be
+        either be ``None`` or ``False``.
     chunk_store : MutableMapping, optional
         A separate Zarr store only for chunk data.
     storage_options : dict, optional
@@ -745,6 +772,10 @@ def open_zarr(
         represented using ``np.datetime64[ns]`` objects.  If False, always
         decode times to ``np.datetime64[ns]`` objects; if this is not possible
         raise an error.
+    zarr_version : int or None, optional
+        The desired zarr spec version to target (currently 2 or 3). The default
+        of None will attempt to determine the zarr version from ``store`` when
+        possible, otherwise defaulting to 2.
 
     Returns
     -------
@@ -782,6 +813,7 @@ def open_zarr(
         "chunk_store": chunk_store,
         "storage_options": storage_options,
         "stacklevel": 4,
+        "zarr_version": zarr_version,
     }
 
     ds = open_dataset(
@@ -798,6 +830,7 @@ def open_zarr(
         backend_kwargs=backend_kwargs,
         decode_timedelta=decode_timedelta,
         use_cftime=use_cftime,
+        zarr_version=zarr_version,
     )
     return ds
 
@@ -842,6 +875,7 @@ def open_dataset(
         chunk_store=None,
         storage_options=None,
         stacklevel=3,
+        zarr_version=None,
     ):
 
         filename_or_obj = _normalize_path(filename_or_obj)
@@ -855,6 +889,7 @@ def open_dataset(
             chunk_store=chunk_store,
             storage_options=storage_options,
             stacklevel=stacklevel + 1,
+            zarr_version=zarr_version,
         )
 
         store_entrypoint = StoreBackendEntrypoint()
diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index a029f39c7b8..04b2d773e2e 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -207,6 +207,10 @@ def __mul__(self, other):
             return new_self * other
         return type(self)(n=other * self.n)
 
+    def as_timedelta(self):
+        """All Tick subclasses must implement an as_timedelta method."""
+        raise NotImplementedError
+
 
 def _get_day_of_month(other, day_option):
     """Find the day in `other`'s month that satisfies a BaseCFTimeOffset's
diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py
index b9793e40279..7671be82b36 100644
--- a/xarray/core/_aggregations.py
+++ b/xarray/core/_aggregations.py
@@ -44,9 +44,9 @@ def count(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         keep_attrs : bool or None, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False, the new object will be
@@ -116,9 +116,9 @@ def all(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         keep_attrs : bool or None, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False, the new object will be
@@ -188,9 +188,9 @@ def any(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         keep_attrs : bool or None, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False, the new object will be
@@ -261,9 +261,9 @@ def max(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -348,9 +348,9 @@ def min(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -435,9 +435,9 @@ def mean(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -527,9 +527,9 @@ def prod(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -634,9 +634,9 @@ def sum(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -741,9 +741,9 @@ def std(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -845,9 +845,9 @@ def var(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -948,9 +948,9 @@ def median(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1039,9 +1039,9 @@ def cumsum(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1132,9 +1132,9 @@ def cumprod(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1240,9 +1240,9 @@ def count(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         keep_attrs : bool or None, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False, the new object will be
@@ -1306,9 +1306,9 @@ def all(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         keep_attrs : bool or None, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False, the new object will be
@@ -1372,9 +1372,9 @@ def any(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         keep_attrs : bool or None, optional
             If True, ``attrs`` will be copied from the original
             object to the new one.  If False, the new object will be
@@ -1439,9 +1439,9 @@ def max(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1518,9 +1518,9 @@ def min(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1597,9 +1597,9 @@ def mean(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1681,9 +1681,9 @@ def prod(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1778,9 +1778,9 @@ def sum(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1875,9 +1875,9 @@ def std(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -1969,9 +1969,9 @@ def var(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -2062,9 +2062,9 @@ def median(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -2145,9 +2145,9 @@ def cumsum(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -2234,9 +2234,9 @@ def cumprod(
 
         Parameters
         ----------
-        dim : str, Iterable of Hashable, or None, default: None
+        dim : str, Iterable of Hashable, "..." or None, default: None
             Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"``
-            or ``dim=["x", "y"]``. If None, will reduce over all dimensions.
+            or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions.
         skipna : bool or None, optional
             If True, skip missing values (as marked by NaN). By default, only
             skips missing values for float dtypes; other dtypes either do not
@@ -2317,7 +2317,7 @@ class DatasetGroupByAggregations:
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -2328,14 +2328,14 @@ def reduce(
 
     def _flox_reduce(
         self,
-        dim: Dims | ellipsis,
+        dim: Dims,
         **kwargs: Any,
     ) -> Dataset:
         raise NotImplementedError()
 
     def count(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -2424,7 +2424,7 @@ def count(
 
     def all(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -2513,7 +2513,7 @@ def all(
 
     def any(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -2602,7 +2602,7 @@ def any(
 
     def max(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -2709,7 +2709,7 @@ def max(
 
     def min(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -2816,7 +2816,7 @@ def min(
 
     def mean(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -2927,7 +2927,7 @@ def mean(
 
     def prod(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         min_count: int | None = None,
@@ -3057,7 +3057,7 @@ def prod(
 
     def sum(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         min_count: int | None = None,
@@ -3187,7 +3187,7 @@ def sum(
 
     def std(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         ddof: int = 0,
@@ -3314,7 +3314,7 @@ def std(
 
     def var(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         ddof: int = 0,
@@ -3441,7 +3441,7 @@ def var(
 
     def median(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -3537,7 +3537,7 @@ def median(
 
     def cumsum(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -3631,7 +3631,7 @@ def cumsum(
 
     def cumprod(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -3730,7 +3730,7 @@ class DatasetResampleAggregations:
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -3741,14 +3741,14 @@ def reduce(
 
     def _flox_reduce(
         self,
-        dim: Dims | ellipsis,
+        dim: Dims,
         **kwargs: Any,
     ) -> Dataset:
         raise NotImplementedError()
 
     def count(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -3837,7 +3837,7 @@ def count(
 
     def all(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -3926,7 +3926,7 @@ def all(
 
     def any(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -4015,7 +4015,7 @@ def any(
 
     def max(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -4122,7 +4122,7 @@ def max(
 
     def min(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -4229,7 +4229,7 @@ def min(
 
     def mean(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -4340,7 +4340,7 @@ def mean(
 
     def prod(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         min_count: int | None = None,
@@ -4470,7 +4470,7 @@ def prod(
 
     def sum(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         min_count: int | None = None,
@@ -4600,7 +4600,7 @@ def sum(
 
     def std(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         ddof: int = 0,
@@ -4727,7 +4727,7 @@ def std(
 
     def var(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         ddof: int = 0,
@@ -4854,7 +4854,7 @@ def var(
 
     def median(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -4950,7 +4950,7 @@ def median(
 
     def cumsum(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -5044,7 +5044,7 @@ def cumsum(
 
     def cumprod(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -5143,7 +5143,7 @@ class DataArrayGroupByAggregations:
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -5154,14 +5154,14 @@ def reduce(
 
     def _flox_reduce(
         self,
-        dim: Dims | ellipsis,
+        dim: Dims,
         **kwargs: Any,
     ) -> DataArray:
         raise NotImplementedError()
 
     def count(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -5243,7 +5243,7 @@ def count(
 
     def all(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -5325,7 +5325,7 @@ def all(
 
     def any(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -5407,7 +5407,7 @@ def any(
 
     def max(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -5505,7 +5505,7 @@ def max(
 
     def min(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -5603,7 +5603,7 @@ def min(
 
     def mean(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -5705,7 +5705,7 @@ def mean(
 
     def prod(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         min_count: int | None = None,
@@ -5824,7 +5824,7 @@ def prod(
 
     def sum(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         min_count: int | None = None,
@@ -5943,7 +5943,7 @@ def sum(
 
     def std(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         ddof: int = 0,
@@ -6059,7 +6059,7 @@ def std(
 
     def var(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         ddof: int = 0,
@@ -6175,7 +6175,7 @@ def var(
 
     def median(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -6263,7 +6263,7 @@ def median(
 
     def cumsum(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -6353,7 +6353,7 @@ def cumsum(
 
     def cumprod(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -6448,7 +6448,7 @@ class DataArrayResampleAggregations:
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -6459,14 +6459,14 @@ def reduce(
 
     def _flox_reduce(
         self,
-        dim: Dims | ellipsis,
+        dim: Dims,
         **kwargs: Any,
     ) -> DataArray:
         raise NotImplementedError()
 
     def count(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -6548,7 +6548,7 @@ def count(
 
     def all(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -6630,7 +6630,7 @@ def all(
 
     def any(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -6712,7 +6712,7 @@ def any(
 
     def max(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -6810,7 +6810,7 @@ def max(
 
     def min(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -6908,7 +6908,7 @@ def min(
 
     def mean(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -7010,7 +7010,7 @@ def mean(
 
     def prod(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         min_count: int | None = None,
@@ -7129,7 +7129,7 @@ def prod(
 
     def sum(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         min_count: int | None = None,
@@ -7248,7 +7248,7 @@ def sum(
 
     def std(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         ddof: int = 0,
@@ -7364,7 +7364,7 @@ def std(
 
     def var(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         ddof: int = 0,
@@ -7480,7 +7480,7 @@ def var(
 
     def median(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -7568,7 +7568,7 @@ def median(
 
     def cumsum(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
@@ -7658,7 +7658,7 @@ def cumsum(
 
     def cumprod(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         skipna: bool | None = None,
         keep_attrs: bool | None = None,
diff --git a/xarray/core/_typed_ops.pyi b/xarray/core/_typed_ops.pyi
index 46af53b1097..98a17a47cd5 100644
--- a/xarray/core/_typed_ops.pyi
+++ b/xarray/core/_typed_ops.pyi
@@ -77,157 +77,157 @@ class DataArrayOpsMixin:
     @overload
     def __add__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __add__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __add__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __add__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __sub__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __sub__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __sub__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __sub__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __mul__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __mul__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __mul__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __mul__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __pow__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __pow__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __pow__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __pow__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __truediv__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __truediv__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __truediv__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __truediv__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __floordiv__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __floordiv__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __floordiv__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __floordiv__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __mod__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __mod__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __mod__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __mod__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __and__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __and__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __and__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __and__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __xor__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __xor__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __xor__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __xor__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __or__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __or__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __or__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __or__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __lt__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __lt__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __lt__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __lt__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __le__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __le__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __le__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __le__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __gt__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __gt__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __gt__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __gt__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __ge__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __ge__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __ge__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __ge__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload  # type: ignore[override]
     def __eq__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __eq__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __eq__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __eq__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload  # type: ignore[override]
     def __ne__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __ne__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __ne__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __ne__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __radd__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __radd__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __radd__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __radd__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __rsub__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rsub__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __rsub__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __rsub__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __rmul__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rmul__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __rmul__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __rmul__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __rpow__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rpow__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __rpow__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __rpow__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __rtruediv__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rtruediv__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __rtruediv__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __rtruediv__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __rfloordiv__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rfloordiv__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __rfloordiv__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __rfloordiv__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __rmod__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rmod__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __rmod__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __rmod__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __rand__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rand__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __rand__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __rand__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __rxor__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rxor__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __rxor__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __rxor__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     @overload
     def __ror__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __ror__(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def __ror__(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def __ror__(self: T_DataArray, other: DaCompatible) -> T_DataArray: ...
     def _inplace_binary_op(self, other, f): ...
@@ -417,157 +417,157 @@ class DatasetGroupByOpsMixin:
     @overload
     def __add__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __add__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __add__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __add__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __sub__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __sub__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __sub__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __sub__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __mul__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __mul__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __mul__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __mul__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __pow__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __pow__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __pow__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __pow__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __truediv__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __truediv__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __truediv__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __truediv__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __floordiv__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __floordiv__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __floordiv__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __floordiv__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __mod__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __mod__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __mod__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __mod__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __and__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __and__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __and__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __and__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __xor__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __xor__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __xor__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __xor__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __or__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __or__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __or__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __or__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __lt__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __lt__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __lt__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __lt__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __le__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __le__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __le__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __le__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __gt__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __gt__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __gt__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __gt__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __ge__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __ge__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __ge__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __ge__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload  # type: ignore[override]
     def __eq__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __eq__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __eq__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __eq__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload  # type: ignore[override]
     def __ne__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __ne__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __ne__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __ne__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __radd__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __radd__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __radd__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __radd__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __rsub__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rsub__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __rsub__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __rsub__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __rmul__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rmul__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __rmul__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __rmul__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __rpow__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rpow__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __rpow__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __rpow__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __rtruediv__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rtruediv__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __rtruediv__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __rtruediv__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __rfloordiv__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rfloordiv__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __rfloordiv__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __rfloordiv__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __rmod__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rmod__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __rmod__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __rmod__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __rand__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rand__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __rand__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __rand__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __rxor__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __rxor__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __rxor__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __rxor__(self, other: GroupByIncompatible) -> NoReturn: ...
     @overload
     def __ror__(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def __ror__(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def __ror__(self, other: "DataArray") -> "Dataset": ...
     @overload
     def __ror__(self, other: GroupByIncompatible) -> NoReturn: ...
 
diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py
index e84c4ed2a8a..2c74d2bed1d 100644
--- a/xarray/core/accessor_str.py
+++ b/xarray/core/accessor_str.py
@@ -112,7 +112,7 @@ def _apply_str_ufunc(
     obj: Any,
     dtype: DTypeLike = None,
     output_core_dims: list | tuple = ((),),
-    output_sizes: Mapping[Any, int] = None,
+    output_sizes: Mapping[Any, int] | None = None,
     func_args: tuple = (),
     func_kwargs: Mapping = {},
 ) -> Any:
@@ -222,7 +222,7 @@ def _apply(
         func: Callable,
         dtype: DTypeLike = None,
         output_core_dims: list | tuple = ((),),
-        output_sizes: Mapping[Any, int] = None,
+        output_sizes: Mapping[Any, int] | None = None,
         func_args: tuple = (),
         func_kwargs: Mapping = {},
     ) -> T_DataArray:
@@ -850,7 +850,7 @@ def isupper(self) -> T_DataArray:
         return self._apply(func=lambda x: x.isupper(), dtype=bool)
 
     def count(
-        self, pat: str | bytes | Pattern | Any, flags: int = 0, case: bool = None
+        self, pat: str | bytes | Pattern | Any, flags: int = 0, case: bool | None = None
     ) -> T_DataArray:
         """
         Count occurrences of pattern in each string of the array.
@@ -1097,7 +1097,7 @@ def zfill(self, width: int | Any) -> T_DataArray:
     def contains(
         self,
         pat: str | bytes | Pattern | Any,
-        case: bool = None,
+        case: bool | None = None,
         flags: int = 0,
         regex: bool = True,
     ) -> T_DataArray:
@@ -1170,7 +1170,7 @@ def func(x, ipat):
     def match(
         self,
         pat: str | bytes | Pattern | Any,
-        case: bool = None,
+        case: bool | None = None,
         flags: int = 0,
     ) -> T_DataArray:
         """
@@ -1531,7 +1531,7 @@ def replace(
         pat: str | bytes | Pattern | Any,
         repl: str | bytes | Callable | Any,
         n: int | Any = -1,
-        case: bool = None,
+        case: bool | None = None,
         flags: int = 0,
         regex: bool = True,
     ) -> T_DataArray:
@@ -1603,7 +1603,7 @@ def extract(
         self,
         pat: str | bytes | Pattern | Any,
         dim: Hashable,
-        case: bool = None,
+        case: bool | None = None,
         flags: int = 0,
     ) -> T_DataArray:
         r"""
@@ -1748,7 +1748,7 @@ def extractall(
         pat: str | bytes | Pattern | Any,
         group_dim: Hashable,
         match_dim: Hashable,
-        case: bool = None,
+        case: bool | None = None,
         flags: int = 0,
     ) -> T_DataArray:
         r"""
@@ -1921,7 +1921,7 @@ def _get_res(val, ipat, imaxcount=maxcount, dtype=self._obj.dtype):
     def findall(
         self,
         pat: str | bytes | Pattern | Any,
-        case: bool = None,
+        case: bool | None = None,
         flags: int = 0,
     ) -> T_DataArray:
         r"""
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index df6f541c703..ef30d9afe85 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -141,10 +141,10 @@ def __init__(
         self,
         objects: Iterable[DataAlignable],
         join: str = "inner",
-        indexes: Mapping[Any, Any] = None,
+        indexes: Mapping[Any, Any] | None = None,
         exclude_dims: Iterable = frozenset(),
         exclude_vars: Iterable[Hashable] = frozenset(),
-        method: str = None,
+        method: str | None = None,
         tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value: Any = dtypes.NA,
@@ -474,7 +474,7 @@ def override_indexes(self) -> None:
                 if obj_idx is not None:
                     for name, var in self.aligned_index_vars[key].items():
                         new_indexes[name] = aligned_idx
-                        new_variables[name] = var.copy()
+                        new_variables[name] = var.copy(deep=self.copy)
 
             objects[i + 1] = obj._overwrite_indexes(new_indexes, new_variables)
 
@@ -490,7 +490,7 @@ def _get_dim_pos_indexers(
             obj_idx = matching_indexes.get(key)
             if obj_idx is not None:
                 if self.reindex[key]:
-                    indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs)  # type: ignore[call-arg]
+                    indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs)
                     dim_pos_indexers.update(indexers)
 
         return dim_pos_indexers
@@ -514,7 +514,7 @@ def _get_indexes_and_vars(
             if obj_idx is not None:
                 for name, var in index_vars.items():
                     new_indexes[name] = aligned_idx
-                    new_variables[name] = var.copy()
+                    new_variables[name] = var.copy(deep=self.copy)
 
         return new_indexes, new_variables
 
@@ -853,7 +853,7 @@ def is_alignable(obj):
 def reindex(
     obj: DataAlignable,
     indexers: Mapping[Any, Any],
-    method: str = None,
+    method: str | None = None,
     tolerance: int | float | Iterable[int | float] | None = None,
     copy: bool = True,
     fill_value: Any = dtypes.NA,
@@ -892,7 +892,7 @@ def reindex(
 def reindex_like(
     obj: DataAlignable,
     other: Dataset | DataArray,
-    method: str = None,
+    method: str | None = None,
     tolerance: int | float | Iterable[int | float] | None = None,
     copy: bool = True,
     fill_value: Any = dtypes.NA,
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index fe4178eca61..f474d3beb19 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -377,7 +377,7 @@ def _nested_combine(
 
 # Define type for arbitrarily-nested list of lists recursively
 # Currently mypy cannot handle this but other linters can (https://stackoverflow.com/a/53845083/3154101)
-DATASET_HYPERCUBE = Union[Dataset, Iterable["DATASET_HYPERCUBE"]]  # type: ignore
+DATASET_HYPERCUBE = Union[Dataset, Iterable["DATASET_HYPERCUBE"]]  # type: ignore[misc]
 
 
 def combine_nested(
@@ -669,7 +669,7 @@ def combine_by_coords(
     fill_value: object = dtypes.NA,
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "no_conflicts",
-    datasets: Iterable[Dataset] = None,
+    datasets: Iterable[Dataset] | None = None,
 ) -> Dataset | DataArray:
     """
 
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 13fd91d8e99..d1387d62e99 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -44,7 +44,13 @@
     from .indexes import Index
     from .resample import Resample
     from .rolling_exp import RollingExp
-    from .types import DTypeLikeSave, ScalarOrArray, SideOptions, T_DataWithCoords
+    from .types import (
+        DatetimeLike,
+        DTypeLikeSave,
+        ScalarOrArray,
+        SideOptions,
+        T_DataWithCoords,
+    )
     from .variable import Variable
 
     DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
@@ -770,7 +776,7 @@ def pipe(
 
     def rolling_exp(
         self: T_DataWithCoords,
-        window: Mapping[Any, int] = None,
+        window: Mapping[Any, int] | None = None,
         window_type: str = "span",
         **window_kwargs,
     ) -> RollingExp[T_DataWithCoords]:
@@ -817,7 +823,9 @@ def _resample(
         skipna: bool | None,
         closed: SideOptions | None,
         label: SideOptions | None,
-        base: int,
+        base: int | None,
+        offset: pd.Timedelta | datetime.timedelta | str | None,
+        origin: str | DatetimeLike,
         keep_attrs: bool | None,
         loffset: datetime.timedelta | str | None,
         restore_coord_dims: bool | None,
@@ -845,6 +853,18 @@ def _resample(
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -960,10 +980,24 @@ def _resample(
             if isinstance(self._indexes[dim_name].to_pandas_index(), CFTimeIndex):
                 from .resample_cftime import CFTimeGrouper
 
-                grouper = CFTimeGrouper(freq, closed, label, base, loffset)
+                grouper = CFTimeGrouper(
+                    freq=freq,
+                    closed=closed,
+                    label=label,
+                    base=base,
+                    loffset=loffset,
+                    origin=origin,
+                    offset=offset,
+                )
             else:
                 grouper = pd.Grouper(
-                    freq=freq, closed=closed, label=label, base=base, loffset=loffset
+                    freq=freq,
+                    closed=closed,
+                    label=label,
+                    base=base,
+                    offset=offset,
+                    origin=origin,
+                    loffset=loffset,
                 )
         group = DataArray(
             dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index 6ec38453a4b..41d529b1093 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -846,7 +846,7 @@ def apply_array_ufunc(func, *args, dask="forbidden"):
 def apply_ufunc(
     func: Callable,
     *args: Any,
-    input_core_dims: Sequence[Sequence] = None,
+    input_core_dims: Sequence[Sequence] | None = None,
     output_core_dims: Sequence[Sequence] | None = ((),),
     exclude_dims: AbstractSet = frozenset(),
     vectorize: bool = False,
@@ -1734,7 +1734,7 @@ def dot(
             dim_counts.update(arr.dims)
         dims = tuple(d for d, c in dim_counts.items() if c > 1)
 
-    dot_dims: set[Hashable] = set(dims)  # type:ignore[arg-type]
+    dot_dims: set[Hashable] = set(dims)
 
     # dimensions to be parallelized
     broadcast_dims = common_dims - dot_dims
@@ -1855,15 +1855,13 @@ def where(cond, x, y, keep_attrs=None):
     Dataset.where, DataArray.where :
         equivalent methods
     """
+    from .dataset import Dataset
+
     if keep_attrs is None:
         keep_attrs = _get_keep_attrs(default=False)
-    if keep_attrs is True:
-        # keep the attributes of x, the second parameter, by default to
-        # be consistent with the `where` method of `DataArray` and `Dataset`
-        keep_attrs = lambda attrs, context: getattr(x, "attrs", {})
 
     # alignment for three arguments is complicated, so don't support it yet
-    return apply_ufunc(
+    result = apply_ufunc(
         duck_array_ops.where,
         cond,
         x,
@@ -1874,24 +1872,53 @@ def where(cond, x, y, keep_attrs=None):
         keep_attrs=keep_attrs,
     )
 
+    # keep the attributes of x, the second parameter, by default to
+    # be consistent with the `where` method of `DataArray` and `Dataset`
+    # rebuild the attrs from x at each level of the output, which could be
+    # Dataset, DataArray, or Variable, and also handle coords
+    if keep_attrs is True:
+        if isinstance(y, Dataset) and not isinstance(x, Dataset):
+            # handle special case where x gets promoted to Dataset
+            result.attrs = {}
+            if getattr(x, "name", None) in result.data_vars:
+                result[x.name].attrs = getattr(x, "attrs", {})
+        else:
+            # otherwise, fill in global attrs and variable attrs (if they exist)
+            result.attrs = getattr(x, "attrs", {})
+            for v in getattr(result, "data_vars", []):
+                result[v].attrs = getattr(getattr(x, v, None), "attrs", {})
+        for c in getattr(result, "coords", []):
+            # always fill coord attrs of x
+            result[c].attrs = getattr(getattr(x, c, None), "attrs", {})
+
+    return result
+
 
 @overload
-def polyval(coord: DataArray, coeffs: DataArray, degree_dim: Hashable) -> DataArray:
+def polyval(
+    coord: DataArray, coeffs: DataArray, degree_dim: Hashable = "degree"
+) -> DataArray:
     ...
 
 
 @overload
-def polyval(coord: DataArray, coeffs: Dataset, degree_dim: Hashable) -> Dataset:
+def polyval(
+    coord: DataArray, coeffs: Dataset, degree_dim: Hashable = "degree"
+) -> Dataset:
     ...
 
 
 @overload
-def polyval(coord: Dataset, coeffs: DataArray, degree_dim: Hashable) -> Dataset:
+def polyval(
+    coord: Dataset, coeffs: DataArray, degree_dim: Hashable = "degree"
+) -> Dataset:
     ...
 
 
 @overload
-def polyval(coord: Dataset, coeffs: Dataset, degree_dim: Hashable) -> Dataset:
+def polyval(
+    coord: Dataset, coeffs: Dataset, degree_dim: Hashable = "degree"
+) -> Dataset:
     ...
 
 
@@ -2001,10 +2028,10 @@ def _calc_idxminmax(
     *,
     array,
     func: Callable,
-    dim: Hashable = None,
-    skipna: bool = None,
+    dim: Hashable | None = None,
+    skipna: bool | None = None,
     fill_value: Any = dtypes.NA,
-    keep_attrs: bool = None,
+    keep_attrs: bool | None = None,
 ):
     """Apply common operations for idxmin and idxmax."""
     # This function doesn't make sense for scalars so don't try
diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index 47350b9403f..3a6b70f117a 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -2,7 +2,7 @@
 
 import warnings
 from contextlib import contextmanager
-from typing import TYPE_CHECKING, Any, Hashable, Iterator, Mapping, Sequence, cast
+from typing import TYPE_CHECKING, Any, Hashable, Iterator, List, Mapping, Sequence
 
 import numpy as np
 import pandas as pd
@@ -14,18 +14,27 @@
 from .variable import Variable, calculate_dimensions
 
 if TYPE_CHECKING:
+    from .common import DataWithCoords
     from .dataarray import DataArray
     from .dataset import Dataset
+    from .types import T_DataArray
 
 # Used as the key corresponding to a DataArray's variable when converting
 # arbitrary DataArray objects to datasets
 _THIS_ARRAY = ReprObject("<this-array>")
 
+# TODO: Remove when min python version >= 3.9:
+GenericAlias = type(List[int])
 
-class Coordinates(Mapping[Hashable, "DataArray"]):
-    __slots__ = ()
 
-    def __getitem__(self, key: Hashable) -> DataArray:
+class Coordinates(Mapping[Hashable, "T_DataArray"]):
+    _data: DataWithCoords
+    __slots__ = ("_data",)
+
+    # TODO: Remove when min python version >= 3.9:
+    __class_getitem__ = classmethod(GenericAlias)
+
+    def __getitem__(self, key: Hashable) -> T_DataArray:
         raise NotImplementedError()
 
     def __setitem__(self, key: Hashable, value: Any) -> None:
@@ -45,11 +54,11 @@ def dtypes(self) -> Frozen[Hashable, np.dtype]:
 
     @property
     def indexes(self) -> Indexes[pd.Index]:
-        return self._data.indexes  # type: ignore[attr-defined]
+        return self._data.indexes
 
     @property
     def xindexes(self) -> Indexes[Index]:
-        return self._data.xindexes  # type: ignore[attr-defined]
+        return self._data.xindexes
 
     @property
     def variables(self):
@@ -79,7 +88,7 @@ def __repr__(self) -> str:
     def to_dataset(self) -> Dataset:
         raise NotImplementedError()
 
-    def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
+    def to_index(self, ordered_dims: Sequence[Hashable] | None = None) -> pd.Index:
         """Convert all index coordinates into a :py:class:`pandas.Index`.
 
         Parameters
@@ -107,11 +116,9 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
             raise ValueError("no valid index for a 0-dimensional object")
         elif len(ordered_dims) == 1:
             (dim,) = ordered_dims
-            return self._data.get_index(dim)  # type: ignore[attr-defined]
+            return self._data.get_index(dim)
         else:
-            indexes = [
-                self._data.get_index(k) for k in ordered_dims  # type: ignore[attr-defined]
-            ]
+            indexes = [self._data.get_index(k) for k in ordered_dims]
 
             # compute the sizes of the repeat and tile for the cartesian product
             # (taken from pandas.core.reshape.util)
@@ -238,6 +245,8 @@ class DatasetCoordinates(Coordinates):
     objects.
     """
 
+    _data: Dataset
+
     __slots__ = ("_data",)
 
     def __init__(self, dataset: Dataset):
@@ -278,7 +287,7 @@ def variables(self) -> Mapping[Hashable, Variable]:
     def __getitem__(self, key: Hashable) -> DataArray:
         if key in self._data.data_vars:
             raise KeyError(key)
-        return cast("DataArray", self._data[key])
+        return self._data[key]
 
     def to_dataset(self) -> Dataset:
         """Convert these coordinates into a new Dataset"""
@@ -334,16 +343,18 @@ def _ipython_key_completions_(self):
         ]
 
 
-class DataArrayCoordinates(Coordinates):
+class DataArrayCoordinates(Coordinates["T_DataArray"]):
     """Dictionary like container for DataArray coordinates.
 
     Essentially a dict with keys given by the array's
     dimensions and the values given by corresponding DataArray objects.
     """
 
+    _data: T_DataArray
+
     __slots__ = ("_data",)
 
-    def __init__(self, dataarray: DataArray):
+    def __init__(self, dataarray: T_DataArray) -> None:
         self._data = dataarray
 
     @property
@@ -366,7 +377,7 @@ def dtypes(self) -> Frozen[Hashable, np.dtype]:
     def _names(self) -> set[Hashable]:
         return set(self._data._coords)
 
-    def __getitem__(self, key: Hashable) -> DataArray:
+    def __getitem__(self, key: Hashable) -> T_DataArray:
         return self._data._getitem_coord(key)
 
     def _update_coords(
@@ -452,7 +463,7 @@ def drop_coords(
 
 
 def assert_coordinate_consistent(
-    obj: DataArray | Dataset, coords: Mapping[Any, Variable]
+    obj: T_DataArray | Dataset, coords: Mapping[Any, Variable]
 ) -> None:
     """Make sure the dimension coordinate of obj is consistent with coords.
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 15d1777b270..f939a2c8b6e 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -78,6 +78,7 @@
     from .rolling import DataArrayCoarsen, DataArrayRolling
     from .types import (
         CoarsenBoundaryOptions,
+        DatetimeLike,
         DatetimeUnitOptions,
         Dims,
         ErrorOptions,
@@ -377,10 +378,10 @@ def __init__(
         | Mapping[Any, Any]
         | None = None,
         dims: Hashable | Sequence[Hashable] | None = None,
-        name: Hashable = None,
-        attrs: Mapping = None,
+        name: Hashable | None = None,
+        attrs: Mapping | None = None,
         # internal parameters
-        indexes: dict[Hashable, Index] = None,
+        indexes: dict[Hashable, Index] | None = None,
         fastpath: bool = False,
     ) -> None:
         if fastpath:
@@ -427,7 +428,7 @@ def __init__(
 
         # TODO(shoyer): document this argument, once it becomes part of the
         # public interface.
-        self._indexes = indexes  # type: ignore[assignment]
+        self._indexes = indexes
 
         self._close = None
 
@@ -452,7 +453,7 @@ def _construct_direct(
 
     def _replace(
         self: T_DataArray,
-        variable: Variable = None,
+        variable: Variable | None = None,
         coords=None,
         name: Hashable | None | Default = _default,
         indexes=None,
@@ -495,9 +496,9 @@ def _replace_maybe_drop_dims(
     def _overwrite_indexes(
         self: T_DataArray,
         indexes: Mapping[Any, Index],
-        coords: Mapping[Any, Variable] = None,
-        drop_coords: list[Hashable] = None,
-        rename_dims: Mapping[Any, Any] = None,
+        coords: Mapping[Any, Variable] | None = None,
+        drop_coords: list[Hashable] | None = None,
+        rename_dims: Mapping[Any, Any] | None = None,
     ) -> T_DataArray:
         """Maybe replace indexes and their corresponding coordinates."""
         if not indexes:
@@ -1415,8 +1416,8 @@ def isel(
 
     def sel(
         self: T_DataArray,
-        indexers: Mapping[Any, Any] = None,
-        method: str = None,
+        indexers: Mapping[Any, Any] | None = None,
+        method: str | None = None,
         tolerance=None,
         drop: bool = False,
         **indexers_kwargs: Any,
@@ -1953,7 +1954,7 @@ def reindex_like(
 
     def reindex(
         self: T_DataArray,
-        indexers: Mapping[Any, Any] = None,
+        indexers: Mapping[Any, Any] | None = None,
         method: ReindexMethodOptions = None,
         tolerance: float | Iterable[float] | None = None,
         copy: bool = True,
@@ -2515,7 +2516,7 @@ def expand_dims(
     # https://github.com/python/mypy/issues/12846 is resolved
     def set_index(
         self,
-        indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None,
+        indexes: Mapping[Any, Hashable | Sequence[Hashable]] | None = None,
         append: bool = False,
         **indexes_kwargs: Hashable | Sequence[Hashable],
     ) -> DataArray:
@@ -3613,7 +3614,7 @@ def combine_first(self: T_DataArray, other: T_DataArray) -> T_DataArray:
     def reduce(
         self: T_DataArray,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -3993,8 +3994,8 @@ def to_dict(self, data: bool = True, encoding: bool = False) -> dict[str, Any]:
         """
         d = self.variable.to_dict(data=data)
         d.update({"coords": {}, "name": self.name})
-        for k in self.coords:
-            d["coords"][k] = self.coords[k].variable.to_dict(data=data)
+        for k, coord in self.coords.items():
+            d["coords"][k] = coord.variable.to_dict(data=data)
         if encoding:
             d["encoding"] = dict(self.encoding)
         return d
@@ -4600,7 +4601,7 @@ def imag(self: T_DataArray) -> T_DataArray:
     def dot(
         self: T_DataArray,
         other: T_DataArray,
-        dims: Dims | ellipsis = None,
+        dims: Dims = None,
     ) -> T_DataArray:
         """Perform dot product of two DataArrays along their shared dims.
 
@@ -4724,7 +4725,7 @@ def quantile(
         method: QuantileMethods = "linear",
         keep_attrs: bool | None = None,
         skipna: bool | None = None,
-        interpolation: QuantileMethods = None,
+        interpolation: QuantileMethods | None = None,
     ) -> T_DataArray:
         """Compute the qth quantile of the data along the specified dimension.
 
@@ -5604,7 +5605,7 @@ def idxmax(
     # https://github.com/python/mypy/issues/12846 is resolved
     def argmin(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         axis: int | None = None,
         keep_attrs: bool | None = None,
         skipna: bool | None = None,
@@ -5706,7 +5707,7 @@ def argmin(
     # https://github.com/python/mypy/issues/12846 is resolved
     def argmax(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         axis: int | None = None,
         keep_attrs: bool | None = None,
         skipna: bool | None = None,
@@ -6531,7 +6532,9 @@ def resample(
         skipna: bool | None = None,
         closed: SideOptions | None = None,
         label: SideOptions | None = None,
-        base: int = 0,
+        base: int | None = None,
+        offset: pd.Timedelta | datetime.timedelta | str | None = None,
+        origin: str | DatetimeLike = "start_day",
         keep_attrs: bool | None = None,
         loffset: datetime.timedelta | str | None = None,
         restore_coord_dims: bool | None = None,
@@ -6555,10 +6558,22 @@ def resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, default = 0
+        base : int, optional
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -6640,6 +6655,8 @@ def resample(
             closed=closed,
             label=label,
             base=base,
+            offset=offset,
+            origin=origin,
             keep_attrs=keep_attrs,
             loffset=loffset,
             restore_coord_dims=restore_coord_dims,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index dbf5e46b2ad..4f376bdf811 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -107,6 +107,7 @@
         CoarsenBoundaryOptions,
         CombineAttrsOptions,
         CompatOptions,
+        DatetimeLike,
         DatetimeUnitOptions,
         Dims,
         ErrorOptions,
@@ -153,7 +154,7 @@
 
 
 def _get_virtual_variable(
-    variables, key: Hashable, dim_sizes: Mapping = None
+    variables, key: Hashable, dim_sizes: Mapping | None = None
 ) -> tuple[Hashable, Hashable, Variable]:
     """Get a virtual variable (e.g., 'time.year') from a dict of xarray.Variable
     objects (if possible)
@@ -831,7 +832,7 @@ def _dask_postcompute(self: T_Dataset, results: Iterable[Variable]) -> T_Dataset
         )
 
     def _dask_postpersist(
-        self: T_Dataset, dsk: Mapping, *, rename: Mapping[str, str] = None
+        self: T_Dataset, dsk: Mapping, *, rename: Mapping[str, str] | None = None
     ) -> T_Dataset:
         from dask import is_dask_collection
         from dask.highlevelgraph import HighLevelGraph
@@ -971,7 +972,7 @@ def _construct_direct(
 
     def _replace(
         self: T_Dataset,
-        variables: dict[Hashable, Variable] = None,
+        variables: dict[Hashable, Variable] | None = None,
         coord_names: set[Hashable] | None = None,
         dims: dict[Any, int] | None = None,
         attrs: dict[Hashable, Any] | None | Default = _default,
@@ -1767,7 +1768,7 @@ def to_netcdf(
         format: T_NetcdfTypes | None = None,
         group: str | None = None,
         engine: T_NetcdfEngine | None = None,
-        encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
+        encoding: Mapping[Any, Mapping[str, Any]] | None = None,
         unlimited_dims: Iterable[Hashable] | None = None,
         compute: bool = True,
         invalid_netcdf: bool = False,
@@ -1783,7 +1784,7 @@ def to_netcdf(
         format: T_NetcdfTypes | None = None,
         group: str | None = None,
         engine: T_NetcdfEngine | None = None,
-        encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
+        encoding: Mapping[Any, Mapping[str, Any]] | None = None,
         unlimited_dims: Iterable[Hashable] | None = None,
         compute: Literal[True] = True,
         invalid_netcdf: bool = False,
@@ -1799,7 +1800,7 @@ def to_netcdf(
         format: T_NetcdfTypes | None = None,
         group: str | None = None,
         engine: T_NetcdfEngine | None = None,
-        encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
+        encoding: Mapping[Any, Mapping[str, Any]] | None = None,
         unlimited_dims: Iterable[Hashable] | None = None,
         *,
         compute: Literal[False],
@@ -1814,7 +1815,7 @@ def to_netcdf(
         format: T_NetcdfTypes | None = None,
         group: str | None = None,
         engine: T_NetcdfEngine | None = None,
-        encoding: Mapping[Hashable, Mapping[str, Any]] | None = None,
+        encoding: Mapping[Any, Mapping[str, Any]] | None = None,
         unlimited_dims: Iterable[Hashable] | None = None,
         compute: bool = True,
         invalid_netcdf: bool = False,
@@ -1930,6 +1931,7 @@ def to_zarr(
         region: Mapping[str, slice] | None = None,
         safe_chunks: bool = True,
         storage_options: dict[str, str] | None = None,
+        zarr_version: int | None = None,
     ) -> ZarrStore:
         ...
 
@@ -1967,6 +1969,7 @@ def to_zarr(
         region: Mapping[str, slice] | None = None,
         safe_chunks: bool = True,
         storage_options: dict[str, str] | None = None,
+        zarr_version: int | None = None,
     ) -> ZarrStore | Delayed:
         """Write dataset contents to a zarr group.
 
@@ -2017,6 +2020,9 @@ def to_zarr(
             metadata; if False, do not. The default (`consolidated=None`) means
             write consolidated metadata and attempt to read consolidated
             metadata for existing stores (falling back to non-consolidated).
+
+            When the experimental ``zarr_version=3``, ``consolidated`` must be
+            either be ``None`` or ``False``.
         append_dim : hashable, optional
             If set, the dimension along which the data will be appended. All
             other dimensions on overridden variables must remain the same size.
@@ -2048,6 +2054,10 @@ def to_zarr(
         storage_options : dict, optional
             Any additional parameters for the storage backend (ignored for local
             paths).
+        zarr_version : int or None, optional
+            The desired zarr spec version to target (currently 2 or 3). The
+            default of None will attempt to determine the zarr version from
+            ``store`` when possible, otherwise defaulting to 2.
 
         Returns
         -------
@@ -2092,6 +2102,7 @@ def to_zarr(
             append_dim=append_dim,
             region=region,
             safe_chunks=safe_chunks,
+            zarr_version=zarr_version,
         )
 
     def __repr__(self) -> str:
@@ -2484,8 +2495,8 @@ def _isel_fancy(
 
     def sel(
         self: T_Dataset,
-        indexers: Mapping[Any, Any] = None,
-        method: str = None,
+        indexers: Mapping[Any, Any] | None = None,
+        method: str | None = None,
         tolerance: int | float | Iterable[int | float] | None = None,
         drop: bool = False,
         **indexers_kwargs: Any,
@@ -2749,7 +2760,9 @@ def thin(
         return self.isel(indexers_slices)
 
     def broadcast_like(
-        self: T_Dataset, other: Dataset | DataArray, exclude: Iterable[Hashable] = None
+        self: T_Dataset,
+        other: Dataset | DataArray,
+        exclude: Iterable[Hashable] | None = None,
     ) -> T_Dataset:
         """Broadcast this DataArray against another Dataset or DataArray.
         This is equivalent to xr.broadcast(other, self)[1]
@@ -3117,8 +3130,8 @@ def reindex(
 
     def _reindex(
         self: T_Dataset,
-        indexers: Mapping[Any, Any] = None,
-        method: str = None,
+        indexers: Mapping[Any, Any] | None = None,
+        method: str | None = None,
         tolerance: int | float | Iterable[int | float] | None = None,
         copy: bool = True,
         fill_value: Any = xrdtypes.NA,
@@ -3144,7 +3157,7 @@ def interp(
         coords: Mapping[Any, Any] | None = None,
         method: InterpOptions = "linear",
         assume_sorted: bool = False,
-        kwargs: Mapping[str, Any] = None,
+        kwargs: Mapping[str, Any] | None = None,
         method_non_numeric: str = "nearest",
         **coords_kwargs: Any,
     ) -> T_Dataset:
@@ -3694,7 +3707,9 @@ def rename_dims(
         return self._replace(variables, coord_names, dims=sizes, indexes=indexes)
 
     def rename_vars(
-        self: T_Dataset, name_dict: Mapping[Any, Hashable] = None, **names: Hashable
+        self: T_Dataset,
+        name_dict: Mapping[Any, Hashable] | None = None,
+        **names: Hashable,
     ) -> T_Dataset:
         """Returns a new object with renamed variables including coordinates
 
@@ -3732,7 +3747,7 @@ def rename_vars(
         return self._replace(variables, coord_names, dims=dims, indexes=indexes)
 
     def swap_dims(
-        self: T_Dataset, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs
+        self: T_Dataset, dims_dict: Mapping[Any, Hashable] | None = None, **dims_kwargs
     ) -> T_Dataset:
         """Returns a new object with swapped dimensions.
 
@@ -5578,7 +5593,7 @@ def interpolate_na(
         self: T_Dataset,
         dim: Hashable | None = None,
         method: InterpOptions = "linear",
-        limit: int = None,
+        limit: int | None = None,
         use_coordinate: bool | Hashable = True,
         max_gap: (
             int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta
@@ -5783,7 +5798,7 @@ def combine_first(self: T_Dataset, other: T_Dataset) -> T_Dataset:
     def reduce(
         self: T_Dataset,
         func: Callable,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         keep_attrs: bool | None = None,
         keepdims: bool = False,
@@ -6978,9 +6993,9 @@ def quantile(
         dim: Dims = None,
         method: QuantileMethods = "linear",
         numeric_only: bool = False,
-        keep_attrs: bool = None,
-        skipna: bool = None,
-        interpolation: QuantileMethods = None,
+        keep_attrs: bool | None = None,
+        skipna: bool | None = None,
+        interpolation: QuantileMethods | None = None,
     ) -> T_Dataset:
         """Compute the qth quantile of the data along the specified dimension.
 
@@ -7905,7 +7920,7 @@ def polyfit(
 
     def pad(
         self: T_Dataset,
-        pad_width: Mapping[Any, int | tuple[int, int]] = None,
+        pad_width: Mapping[Any, int | tuple[int, int]] | None = None,
         mode: PadModeOptions = "constant",
         stat_length: int
         | tuple[int, int]
@@ -9114,7 +9129,9 @@ def resample(
         skipna: bool | None = None,
         closed: SideOptions | None = None,
         label: SideOptions | None = None,
-        base: int = 0,
+        base: int | None = None,
+        offset: pd.Timedelta | datetime.timedelta | str | None = None,
+        origin: str | DatetimeLike = "start_day",
         keep_attrs: bool | None = None,
         loffset: datetime.timedelta | str | None = None,
         restore_coord_dims: bool | None = None,
@@ -9138,10 +9155,22 @@ def resample(
             Side of each interval to treat as closed.
         label : {"left", "right"}, optional
             Side of each interval to use for labeling.
-        base : int, default = 0
+        base : int, optional
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for "24H" frequency, base could
             range from 0 through 23.
+        origin : {'epoch', 'start', 'start_day', 'end', 'end_day'}, pd.Timestamp, datetime.datetime, np.datetime64, or cftime.datetime, default 'start_day'
+            The datetime on which to adjust the grouping. The timezone of origin
+            must match the timezone of the index.
+
+            If a datetime is not used, these values are also supported:
+            - 'epoch': `origin` is 1970-01-01
+            - 'start': `origin` is the first value of the timeseries
+            - 'start_day': `origin` is the first day at midnight of the timeseries
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+        offset : pd.Timedelta, datetime.timedelta, or str, default is None
+            An offset timedelta added to the origin.
         loffset : timedelta or str, optional
             Offset used to adjust the resampled time labels. Some pandas date
             offset strings are supported.
@@ -9176,6 +9205,8 @@ def resample(
             closed=closed,
             label=label,
             base=base,
+            offset=offset,
+            origin=origin,
             keep_attrs=keep_attrs,
             loffset=loffset,
             restore_coord_dims=restore_coord_dims,
diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py
index 84d184dcaca..d40151e48fb 100644
--- a/xarray/core/extensions.py
+++ b/xarray/core/extensions.py
@@ -105,6 +105,7 @@ def register_dataset_accessor(name):
     ...     def plot(self):
     ...         # plot this array's data on a map, e.g., using Cartopy
     ...         pass
+    ...
 
     Back in an interactive IPython session:
 
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index b014cf43e83..351a4b27687 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -285,7 +285,11 @@ def inline_variable_array_repr(var, max_width):
 
 
 def summarize_variable(
-    name: Hashable, var, col_width: int, max_width: int = None, is_index: bool = False
+    name: Hashable,
+    var,
+    col_width: int,
+    max_width: int | None = None,
+    is_index: bool = False,
 ):
     """Summarize a variable in one line, e.g., for the Dataset.__repr__."""
     variable = getattr(var, "variable", var)
@@ -419,7 +423,9 @@ def inline_index_repr(index, max_width=None):
     return repr_
 
 
-def summarize_index(name: Hashable, index, col_width: int, max_width: int = None):
+def summarize_index(
+    name: Hashable, index, col_width: int, max_width: int | None = None
+):
     if max_width is None:
         max_width = OPTIONS["display_width"]
 
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 8373de6567d..37d2b5e4013 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -551,7 +551,7 @@ def map(
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -699,7 +699,7 @@ def _maybe_unstack(self, obj):
 
     def _flox_reduce(
         self,
-        dim: Dims | ellipsis,
+        dim: Dims,
         keep_attrs: bool | None = None,
         **kwargs: Any,
     ):
@@ -757,7 +757,7 @@ def _flox_reduce(
         elif dim is ...:
             parsed_dim = tuple(self._original_obj.dims)
         else:
-            parsed_dim = tuple(dim)  # type:ignore[arg-type]
+            parsed_dim = tuple(dim)
 
         # Do this so we raise the same error message whether flox is present or not.
         # Better to control it here than in flox.
@@ -1183,7 +1183,7 @@ def _combine(self, applied, shortcut=False):
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -1336,7 +1336,7 @@ def _combine(self, applied):
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
index a18322fe06b..a768155ba7d 100644
--- a/xarray/core/indexes.py
+++ b/xarray/core/indexes.py
@@ -52,7 +52,7 @@ def concat(
         cls: type[T_Index],
         indexes: Sequence[T_Index],
         dim: Hashable,
-        positions: Iterable[Iterable[int]] = None,
+        positions: Iterable[Iterable[int]] | None = None,
     ) -> T_Index:
         raise NotImplementedError()
 
@@ -117,10 +117,12 @@ def __copy__(self) -> Index:
     def __deepcopy__(self, memo: dict[int, Any] | None = None) -> Index:
         return self._copy(deep=True, memo=memo)
 
-    def copy(self, deep: bool = True) -> Index:
+    def copy(self: T_Index, deep: bool = True) -> T_Index:
         return self._copy(deep=deep)
 
-    def _copy(self, deep: bool = True, memo: dict[int, Any] | None = None) -> Index:
+    def _copy(
+        self: T_Index, deep: bool = True, memo: dict[int, Any] | None = None
+    ) -> T_Index:
         cls = self.__class__
         copied = cls.__new__(cls)
         if deep:
@@ -269,6 +271,9 @@ def get_indexer_nd(index, labels, method=None, tolerance=None):
     return indexer
 
 
+T_PandasIndex = TypeVar("T_PandasIndex", bound="PandasIndex")
+
+
 class PandasIndex(Index):
     """Wrap a pandas.Index as an xarray compatible index."""
 
@@ -368,7 +373,7 @@ def concat(
         cls,
         indexes: Sequence[PandasIndex],
         dim: Hashable,
-        positions: Iterable[Iterable[int]] = None,
+        positions: Iterable[Iterable[int]] | None = None,
     ) -> PandasIndex:
         new_pd_index = cls._concat_indexes(indexes, dim, positions)
 
@@ -532,8 +537,11 @@ def rename(self, name_dict, dims_dict):
         new_dim = dims_dict.get(self.dim, self.dim)
         return self._replace(index, dim=new_dim)
 
-    def copy(self, deep=True):
+    def _copy(
+        self: T_PandasIndex, deep: bool = True, memo: dict[int, Any] | None = None
+    ) -> T_PandasIndex:
         if deep:
+            # pandas is not using the memo
             index = self.index.copy(deep=True)
         else:
             # index will be copied in constructor
@@ -656,7 +664,7 @@ def concat(  # type: ignore[override]
         cls,
         indexes: Sequence[PandasMultiIndex],
         dim: Hashable,
-        positions: Iterable[Iterable[int]] = None,
+        positions: Iterable[Iterable[int]] | None = None,
     ) -> PandasMultiIndex:
         new_pd_index = cls._concat_indexes(indexes, dim, positions)
 
@@ -1265,11 +1273,19 @@ def to_pandas_indexes(self) -> Indexes[pd.Index]:
         return Indexes(indexes, self._variables)
 
     def copy_indexes(
-        self, deep: bool = True
+        self, deep: bool = True, memo: dict[int, Any] | None = None
     ) -> tuple[dict[Hashable, T_PandasOrXarrayIndex], dict[Hashable, Variable]]:
         """Return a new dictionary with copies of indexes, preserving
         unique indexes.
 
+        Parameters
+        ----------
+        deep : bool, default: True
+            Whether the indexes are deep or shallow copied onto the new object.
+        memo : dict if object id to copied objects or None, optional
+            To prevent infinite recursion deepcopy stores all copied elements
+            in this dict.
+
         """
         new_indexes = {}
         new_index_vars = {}
@@ -1285,7 +1301,7 @@ def copy_indexes(
             else:
                 convert_new_idx = False
 
-            new_idx = idx.copy(deep=deep)
+            new_idx = idx._copy(deep=deep, memo=memo)
             idx_vars = idx.create_variables(coords)
 
             if convert_new_idx:
@@ -1346,7 +1362,7 @@ def indexes_equal(
     other_index: Index,
     variable: Variable,
     other_variable: Variable,
-    cache: dict[tuple[int, int], bool | None] = None,
+    cache: dict[tuple[int, int], bool | None] | None = None,
 ) -> bool:
     """Check if two indexes are equal, possibly with cached results.
 
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index 6b33741633d..8dd2d1a0ead 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -180,7 +180,7 @@ def map_index_queries(
             # forward dimension indexers with no index/coordinate
             results.append(IndexSelResult(labels))
         else:
-            results.append(index.sel(labels, **options))  # type: ignore[call-arg]
+            results.append(index.sel(labels, **options))
 
     merged = merge_sel_results(results)
 
@@ -1422,7 +1422,7 @@ def __init__(self, array: pd.Index, dtype: DTypeLike = None):
         if dtype is None:
             self._dtype = get_valid_numpy_dtype(array)
         else:
-            self._dtype = np.dtype(dtype)  # type: ignore[assignment]
+            self._dtype = np.dtype(dtype)
 
     @property
     def dtype(self) -> np.dtype:
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index c2efcc791a1..859b3aeff8f 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -40,9 +40,9 @@
     ArrayLike = Any
     VariableLike = Union[
         ArrayLike,
-        tuple[DimsLike, ArrayLike],
-        tuple[DimsLike, ArrayLike, Mapping],
-        tuple[DimsLike, ArrayLike, Mapping, Mapping],
+        Tuple[DimsLike, ArrayLike],
+        Tuple[DimsLike, ArrayLike, Mapping],
+        Tuple[DimsLike, ArrayLike, Mapping, Mapping],
     ]
     XarrayValue = Union[DataArray, Variable, VariableLike]
     DatasetLike = Union[Dataset, Mapping[Any, XarrayValue]]
@@ -207,7 +207,7 @@ def _assert_prioritized_valid(
 
 def merge_collected(
     grouped: dict[Hashable, list[MergeElement]],
-    prioritized: Mapping[Any, MergeElement] = None,
+    prioritized: Mapping[Any, MergeElement] | None = None,
     compat: CompatOptions = "minimal",
     combine_attrs: CombineAttrsOptions = "override",
     equals: dict[Hashable, bool] | None = None,
@@ -391,7 +391,7 @@ def collect_from_coordinates(
 
 def merge_coordinates_without_align(
     objects: list[Coordinates],
-    prioritized: Mapping[Any, MergeElement] = None,
+    prioritized: Mapping[Any, MergeElement] | None = None,
     exclude_dims: AbstractSet = frozenset(),
     combine_attrs: CombineAttrsOptions = "override",
 ) -> tuple[dict[Hashable, Variable], dict[Hashable, Index]]:
diff --git a/xarray/core/missing.py b/xarray/core/missing.py
index 7390c8971fc..93423a4beff 100644
--- a/xarray/core/missing.py
+++ b/xarray/core/missing.py
@@ -308,12 +308,12 @@ def get_clean_interp_index(
 
 def interp_na(
     self,
-    dim: Hashable = None,
+    dim: Hashable | None = None,
     use_coordinate: bool | str = True,
     method: InterpOptions = "linear",
-    limit: int = None,
+    limit: int | None = None,
     max_gap: int | float | str | pd.Timedelta | np.timedelta64 | dt.timedelta = None,
-    keep_attrs: bool = None,
+    keep_attrs: bool | None = None,
     **kwargs,
 ):
     """Interpolate values according to different methods."""
diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
index 50f3c474f38..0ef428e3d96 100644
--- a/xarray/core/parallel.py
+++ b/xarray/core/parallel.py
@@ -157,7 +157,7 @@ def map_blocks(
     func: Callable[..., T_Xarray],
     obj: DataArray | Dataset,
     args: Sequence[Any] = (),
-    kwargs: Mapping[str, Any] = None,
+    kwargs: Mapping[str, Any] | None = None,
     template: DataArray | Dataset | None = None,
 ) -> T_Xarray:
     """Apply a function to each block of a DataArray or Dataset.
diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py
index 0a6537fe909..474b694dcf0 100644
--- a/xarray/core/pycompat.py
+++ b/xarray/core/pycompat.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 from importlib import import_module
-from typing import Any, Literal
+from types import ModuleType
+from typing import TYPE_CHECKING, Any, Literal, Tuple, Type
 
 import numpy as np
 from packaging.version import Version
@@ -10,7 +11,9 @@
 
 integer_types = (int, np.integer)
 
-ModType = Literal["dask", "pint", "cupy", "sparse"]
+if TYPE_CHECKING:
+    ModType = Literal["dask", "pint", "cupy", "sparse"]
+    DuckArrayTypes = Tuple[Type[Any], ...]  # TODO: improve this? maybe Generic
 
 
 class DuckArrayModule:
@@ -21,12 +24,15 @@ class DuckArrayModule:
     https://github.com/pydata/xarray/pull/5561#discussion_r664815718
     """
 
-    module: ModType | None
+    module: ModuleType | None
     version: Version
-    type: tuple[type[Any]]  # TODO: improve this? maybe Generic
+    type: DuckArrayTypes
     available: bool
 
     def __init__(self, mod: ModType) -> None:
+        duck_array_module: ModuleType | None = None
+        duck_array_version: Version
+        duck_array_type: DuckArrayTypes
         try:
             duck_array_module = import_module(mod)
             duck_array_version = Version(duck_array_module.__version__)
@@ -53,7 +59,7 @@ def __init__(self, mod: ModType) -> None:
         self.available = duck_array_module is not None
 
 
-def array_type(mod: ModType) -> tuple[type[Any]]:
+def array_type(mod: ModType) -> DuckArrayTypes:
     """Quick wrapper to get the array class of the module."""
     return DuckArrayModule(mod).type
 
diff --git a/xarray/core/resample.py b/xarray/core/resample.py
index 29f84231e13..b9800a99d4a 100644
--- a/xarray/core/resample.py
+++ b/xarray/core/resample.py
@@ -47,7 +47,7 @@ def __init__(
 
     def _flox_reduce(
         self,
-        dim: Dims | ellipsis,
+        dim: Dims,
         keep_attrs: bool | None = None,
         **kwargs,
     ) -> T_Xarray:
@@ -346,7 +346,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs):
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py
index 0c50ce5587b..9aa2d792031 100644
--- a/xarray/core/resample_cftime.py
+++ b/xarray/core/resample_cftime.py
@@ -38,21 +38,27 @@
 from __future__ import annotations
 
 import datetime
+import typing
 
 import numpy as np
 import pandas as pd
 
 from ..coding.cftime_offsets import (
-    CFTIME_TICKS,
+    BaseCFTimeOffset,
     Day,
     MonthEnd,
     QuarterEnd,
+    Tick,
     YearEnd,
     cftime_range,
     normalize_date,
     to_offset,
 )
 from ..coding.cftimeindex import CFTimeIndex
+from .types import SideOptions
+
+if typing.TYPE_CHECKING:
+    from .types import CFTimeDatetime
 
 
 class CFTimeGrouper:
@@ -60,25 +66,77 @@ class CFTimeGrouper:
     single method, the only one required for resampling in xarray.  It cannot
     be used in a call to groupby like a pandas.Grouper object can."""
 
-    def __init__(self, freq, closed=None, label=None, base=0, loffset=None):
+    def __init__(
+        self,
+        freq: str | BaseCFTimeOffset,
+        closed: SideOptions | None = None,
+        label: SideOptions | None = None,
+        base: int | None = None,
+        loffset: str | datetime.timedelta | BaseCFTimeOffset | None = None,
+        origin: str | CFTimeDatetime = "start_day",
+        offset: str | datetime.timedelta | None = None,
+    ):
+        self.offset: datetime.timedelta | None
+        self.closed: SideOptions
+        self.label: SideOptions
+
+        if base is not None and offset is not None:
+            raise ValueError("base and offset cannot be provided at the same time")
+
         self.freq = to_offset(freq)
-        self.closed = closed
-        self.label = label
-        self.base = base
         self.loffset = loffset
+        self.origin = origin
 
         if isinstance(self.freq, (MonthEnd, QuarterEnd, YearEnd)):
-            if self.closed is None:
+            if closed is None:
                 self.closed = "right"
-            if self.label is None:
+            else:
+                self.closed = closed
+            if label is None:
                 self.label = "right"
+            else:
+                self.label = label
+        else:
+            # The backward resample sets ``closed`` to ``'right'`` by default
+            # since the last value should be considered as the edge point for
+            # the last bin. When origin in "end" or "end_day", the value for a
+            # specific ``cftime.datetime`` index stands for the resample result
+            # from the current ``cftime.datetime`` minus ``freq`` to the current
+            # ``cftime.datetime`` with a right close.
+            if self.origin in ["end", "end_day"]:
+                if closed is None:
+                    self.closed = "right"
+                else:
+                    self.closed = closed
+                if label is None:
+                    self.label = "right"
+                else:
+                    self.label = label
+            else:
+                if closed is None:
+                    self.closed = "left"
+                else:
+                    self.closed = closed
+                if label is None:
+                    self.label = "left"
+                else:
+                    self.label = label
+
+        if base is not None and isinstance(self.freq, Tick):
+            offset = type(self.freq)(n=base % self.freq.n).as_timedelta()
+
+        if offset is not None:
+            try:
+                self.offset = _convert_offset_to_timedelta(offset)
+            except (ValueError, AttributeError) as error:
+                raise ValueError(
+                    f"offset must be a datetime.timedelta object or an offset string "
+                    f"that can be converted to a timedelta.  Got {offset} instead."
+                ) from error
         else:
-            if self.closed is None:
-                self.closed = "left"
-            if self.label is None:
-                self.label = "left"
+            self.offset = None
 
-    def first_items(self, index):
+    def first_items(self, index: CFTimeIndex):
         """Meant to reproduce the results of the following
 
         grouper = pandas.Grouper(...)
@@ -89,7 +147,7 @@ def first_items(self, index):
         """
 
         datetime_bins, labels = _get_time_bins(
-            index, self.freq, self.closed, self.label, self.base
+            index, self.freq, self.closed, self.label, self.origin, self.offset
         )
         if self.loffset is not None:
             labels = labels + pd.to_timedelta(self.loffset)
@@ -110,7 +168,14 @@ def first_items(self, index):
         return first_items.where(non_duplicate), codes
 
 
-def _get_time_bins(index, freq, closed, label, base):
+def _get_time_bins(
+    index: CFTimeIndex,
+    freq: BaseCFTimeOffset,
+    closed: SideOptions,
+    label: SideOptions,
+    origin: str | CFTimeDatetime,
+    offset: datetime.timedelta | None,
+):
     """Obtain the bins and their respective labels for resampling operations.
 
     Parameters
@@ -121,18 +186,26 @@ def _get_time_bins(index, freq, closed, label, base):
         The offset object representing target conversion a.k.a. resampling
         frequency (e.g., 'MS', '2D', 'H', or '3T' with
         coding.cftime_offsets.to_offset() applied to it).
-    closed : 'left' or 'right', optional
+    closed : 'left' or 'right'
         Which side of bin interval is closed.
         The default is 'left' for all frequency offsets except for 'M' and 'A',
         which have a default of 'right'.
-    label : 'left' or 'right', optional
+    label : 'left' or 'right'
         Which bin edge label to label bucket with.
         The default is 'left' for all frequency offsets except for 'M' and 'A',
         which have a default of 'right'.
-    base : int, optional
-        For frequencies that evenly subdivide 1 day, the "origin" of the
-        aggregated intervals. For example, for '5min' frequency, base could
-        range from 0 through 4. Defaults to 0.
+    origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day'
+        The datetime on which to adjust the grouping. The timezone of origin
+        must match the timezone of the index.
+
+        If a datetime is not used, these values are also supported:
+        - 'epoch': `origin` is 1970-01-01
+        - 'start': `origin` is the first value of the timeseries
+        - 'start_day': `origin` is the first day at midnight of the timeseries
+        - 'end': `origin` is the last value of the timeseries
+        - 'end_day': `origin` is the ceiling midnight of the last day
+    offset : datetime.timedelta, default is None
+        An offset timedelta added to the origin.
 
     Returns
     -------
@@ -153,7 +226,7 @@ def _get_time_bins(index, freq, closed, label, base):
         return datetime_bins, labels
 
     first, last = _get_range_edges(
-        index.min(), index.max(), freq, closed=closed, base=base
+        index.min(), index.max(), freq, closed=closed, origin=origin, offset=offset
     )
     datetime_bins = labels = cftime_range(
         freq=freq, start=first, end=last, name=index.name
@@ -171,7 +244,13 @@ def _get_time_bins(index, freq, closed, label, base):
     return datetime_bins, labels
 
 
-def _adjust_bin_edges(datetime_bins, offset, closed, index, labels):
+def _adjust_bin_edges(
+    datetime_bins: np.ndarray,
+    freq: BaseCFTimeOffset,
+    closed: SideOptions,
+    index: CFTimeIndex,
+    labels: np.ndarray,
+):
     """This is required for determining the bin edges resampling with
     daily frequencies greater than one day, month end, and year end
     frequencies.
@@ -206,8 +285,8 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels):
     This is also required for daily frequencies longer than one day and
     year-end frequencies.
     """
-    is_super_daily = isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)) or (
-        isinstance(offset, Day) and offset.n > 1
+    is_super_daily = isinstance(freq, (MonthEnd, QuarterEnd, YearEnd)) or (
+        isinstance(freq, Day) and freq.n > 1
     )
     if is_super_daily:
         if closed == "right":
@@ -219,7 +298,14 @@ def _adjust_bin_edges(datetime_bins, offset, closed, index, labels):
     return datetime_bins, labels
 
 
-def _get_range_edges(first, last, offset, closed="left", base=0):
+def _get_range_edges(
+    first: CFTimeDatetime,
+    last: CFTimeDatetime,
+    freq: BaseCFTimeOffset,
+    closed: SideOptions = "left",
+    origin: str | CFTimeDatetime = "start_day",
+    offset: datetime.timedelta | None = None,
+):
     """Get the correct starting and ending datetimes for the resampled
     CFTimeIndex range.
 
@@ -231,16 +317,24 @@ def _get_range_edges(first, last, offset, closed="left", base=0):
     last : cftime.datetime
         Uncorrected ending datetime object for resampled CFTimeIndex range.
         Usually the max of the original CFTimeIndex.
-    offset : xarray.coding.cftime_offsets.BaseCFTimeOffset
+    freq : xarray.coding.cftime_offsets.BaseCFTimeOffset
         The offset object representing target conversion a.k.a. resampling
         frequency. Contains information on offset type (e.g. Day or 'D') and
         offset magnitude (e.g., n = 3).
-    closed : 'left' or 'right', optional
+    closed : 'left' or 'right'
         Which side of bin interval is closed. Defaults to 'left'.
-    base : int, optional
-        For frequencies that evenly subdivide 1 day, the "origin" of the
-        aggregated intervals. For example, for '5min' frequency, base could
-        range from 0 through 4. Defaults to 0.
+    origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day'
+        The datetime on which to adjust the grouping. The timezone of origin
+        must match the timezone of the index.
+
+        If a datetime is not used, these values are also supported:
+        - 'epoch': `origin` is 1970-01-01
+        - 'start': `origin` is the first value of the timeseries
+        - 'start_day': `origin` is the first day at midnight of the timeseries
+        - 'end': `origin` is the last value of the timeseries
+        - 'end_day': `origin` is the ceiling midnight of the last day
+    offset : datetime.timedelta, default is None
+        An offset timedelta added to the origin.
 
     Returns
     -------
@@ -249,21 +343,28 @@ def _get_range_edges(first, last, offset, closed="left", base=0):
     last : cftime.datetime
         Corrected ending datetime object for resampled CFTimeIndex range.
     """
-    if isinstance(offset, CFTIME_TICKS):
+    if isinstance(freq, Tick):
         first, last = _adjust_dates_anchored(
-            first, last, offset, closed=closed, base=base
+            first, last, freq, closed=closed, origin=origin, offset=offset
         )
         return first, last
     else:
         first = normalize_date(first)
         last = normalize_date(last)
 
-    first = offset.rollback(first) if closed == "left" else first - offset
-    last = last + offset
+    first = freq.rollback(first) if closed == "left" else first - freq
+    last = last + freq
     return first, last
 
 
-def _adjust_dates_anchored(first, last, offset, closed="right", base=0):
+def _adjust_dates_anchored(
+    first: CFTimeDatetime,
+    last: CFTimeDatetime,
+    freq: Tick,
+    closed: SideOptions = "right",
+    origin: str | CFTimeDatetime = "start_day",
+    offset: datetime.timedelta | None = None,
+):
     """First and last offsets should be calculated from the start day to fix
     an error cause by resampling across multiple days when a one day period is
     not a multiple of the frequency.
@@ -275,16 +376,24 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0):
         A datetime object representing the start of a CFTimeIndex range.
     last : cftime.datetime
         A datetime object representing the end of a CFTimeIndex range.
-    offset : xarray.coding.cftime_offsets.BaseCFTimeOffset
+    freq : xarray.coding.cftime_offsets.BaseCFTimeOffset
         The offset object representing target conversion a.k.a. resampling
         frequency. Contains information on offset type (e.g. Day or 'D') and
         offset magnitude (e.g., n = 3).
-    closed : 'left' or 'right', optional
+    closed : 'left' or 'right'
         Which side of bin interval is closed. Defaults to 'right'.
-    base : int, optional
-        For frequencies that evenly subdivide 1 day, the "origin" of the
-        aggregated intervals. For example, for '5min' frequency, base could
-        range from 0 through 4. Defaults to 0.
+    origin : {'epoch', 'start', 'start_day', 'end', 'end_day'} or cftime.datetime, default 'start_day'
+        The datetime on which to adjust the grouping. The timezone of origin
+        must match the timezone of the index.
+
+        If a datetime is not used, these values are also supported:
+        - 'epoch': `origin` is 1970-01-01
+        - 'start': `origin` is the first value of the timeseries
+        - 'start_day': `origin` is the first day at midnight of the timeseries
+        - 'end': `origin` is the last value of the timeseries
+        - 'end_day': `origin` is the ceiling midnight of the last day
+    offset : datetime.timedelta, default is None
+        An offset timedelta added to the origin.
 
     Returns
     -------
@@ -295,33 +404,59 @@ def _adjust_dates_anchored(first, last, offset, closed="right", base=0):
         A datetime object representing the end of a date range that has been
         adjusted to fix resampling errors.
     """
+    import cftime
+
+    if origin == "start_day":
+        origin_date = normalize_date(first)
+    elif origin == "start":
+        origin_date = first
+    elif origin == "epoch":
+        origin_date = type(first)(1970, 1, 1)
+    elif origin in ["end", "end_day"]:
+        origin_last = last if origin == "end" else _ceil_via_cftimeindex(last, "D")
+        sub_freq_times = (origin_last - first) // freq.as_timedelta()
+        if closed == "left":
+            sub_freq_times += 1
+        first = origin_last - sub_freq_times * freq
+        origin_date = first
+    elif isinstance(origin, cftime.datetime):
+        origin_date = origin
+    else:
+        raise ValueError(
+            f"origin must be one of {{'epoch', 'start_day', 'start', 'end', 'end_day'}} "
+            f"or a cftime.datetime object.  Got {origin}."
+        )
+
+    if offset is not None:
+        origin_date = origin_date + offset
+
+    foffset = (first - origin_date) % freq.as_timedelta()
+    loffset = (last - origin_date) % freq.as_timedelta()
 
-    base = base % offset.n
-    start_day = normalize_date(first)
-    base_td = type(offset)(n=base).as_timedelta()
-    start_day += base_td
-    foffset = exact_cftime_datetime_difference(start_day, first) % offset.as_timedelta()
-    loffset = exact_cftime_datetime_difference(start_day, last) % offset.as_timedelta()
     if closed == "right":
         if foffset.total_seconds() > 0:
             fresult = first - foffset
         else:
-            fresult = first - offset.as_timedelta()
+            fresult = first - freq.as_timedelta()
 
         if loffset.total_seconds() > 0:
-            lresult = last + (offset.as_timedelta() - loffset)
+            lresult = last + (freq.as_timedelta() - loffset)
         else:
             lresult = last
     else:
-        fresult = first - foffset if foffset.total_seconds() > 0 else first
+        if foffset.total_seconds() > 0:
+            fresult = first - foffset
+        else:
+            fresult = first
+
         if loffset.total_seconds() > 0:
-            lresult = last + (offset.as_timedelta() - loffset)
+            lresult = last + (freq.as_timedelta() - loffset)
         else:
-            lresult = last + offset.as_timedelta()
+            lresult = last + freq
     return fresult, lresult
 
 
-def exact_cftime_datetime_difference(a, b):
+def exact_cftime_datetime_difference(a: CFTimeDatetime, b: CFTimeDatetime):
     """Exact computation of b - a
 
     Assumes:
@@ -359,3 +494,19 @@ def exact_cftime_datetime_difference(a, b):
     seconds = int(round(seconds.total_seconds()))
     microseconds = b.microsecond - a.microsecond
     return datetime.timedelta(seconds=seconds, microseconds=microseconds)
+
+
+def _convert_offset_to_timedelta(
+    offset: datetime.timedelta | str | BaseCFTimeOffset,
+) -> datetime.timedelta:
+    if isinstance(offset, datetime.timedelta):
+        return offset
+    elif isinstance(offset, (str, Tick)):
+        return to_offset(offset).as_timedelta()
+    else:
+        raise ValueError
+
+
+def _ceil_via_cftimeindex(date: CFTimeDatetime, freq: str | BaseCFTimeOffset):
+    index = CFTimeIndex([date])
+    return index.ceil(freq).item()
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index 57a5456b70a..f7a573019ae 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -1002,7 +1002,7 @@ def _reduce_method(
             kwargs["skipna"] = None
 
         def wrapped_func(
-            self: DataArrayCoarsen, keep_attrs: bool = None, **kwargs
+            self: DataArrayCoarsen, keep_attrs: bool | None = None, **kwargs
         ) -> DataArray:
             from .dataarray import DataArray
 
@@ -1033,7 +1033,9 @@ def wrapped_func(
 
         return wrapped_func
 
-    def reduce(self, func: Callable, keep_attrs: bool = None, **kwargs) -> DataArray:
+    def reduce(
+        self, func: Callable, keep_attrs: bool | None = None, **kwargs
+    ) -> DataArray:
         """Reduce the items in this group by applying `func` along some
         dimension(s).
 
@@ -1088,7 +1090,7 @@ def _reduce_method(
             kwargs["skipna"] = None
 
         def wrapped_func(
-            self: DatasetCoarsen, keep_attrs: bool = None, **kwargs
+            self: DatasetCoarsen, keep_attrs: bool | None = None, **kwargs
         ) -> Dataset:
             from .dataset import Dataset
 
diff --git a/xarray/core/types.py b/xarray/core/types.py
index 2b65f4d23e6..adf046dabb2 100644
--- a/xarray/core/types.py
+++ b/xarray/core/types.py
@@ -1,20 +1,24 @@
 from __future__ import annotations
 
+import datetime
 from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
     Hashable,
     Iterable,
+    List,
     Literal,
     Protocol,
     Sequence,
     SupportsIndex,
+    Tuple,
     TypeVar,
     Union,
 )
 
 import numpy as np
+import pandas as pd
 from packaging.version import Version
 
 if TYPE_CHECKING:
@@ -70,17 +74,21 @@ def dtype(self) -> np.dtype:
         # character codes, type strings or comma-separated fields, e.g., 'float64'
         str,
         # (flexible_dtype, itemsize)
-        tuple[_DTypeLikeNested, int],
+        Tuple[_DTypeLikeNested, int],
         # (fixed_dtype, shape)
-        tuple[_DTypeLikeNested, _ShapeLike],
+        Tuple[_DTypeLikeNested, _ShapeLike],
         # (base_dtype, new_dtype)
-        tuple[_DTypeLikeNested, _DTypeLikeNested],
+        Tuple[_DTypeLikeNested, _DTypeLikeNested],
         # because numpy does the same?
-        list[Any],
+        List[Any],
         # anything with a dtype attribute
         _SupportsDType,
     ]
-
+    try:
+        from cftime import datetime as CFTimeDatetime
+    except ImportError:
+        CFTimeDatetime = Any
+    DatetimeLike = Union[pd.Timestamp, datetime.datetime, np.datetime64, CFTimeDatetime]
 else:
     Self: Any = None
     DTypeLikeSave: Any = None
@@ -105,7 +113,8 @@ def dtype(self) -> np.dtype:
 VarCompatible = Union["Variable", "ScalarOrArray"]
 GroupByIncompatible = Union["Variable", "GroupBy"]
 
-Dims = Union[str, Iterable[Hashable], None]
+Dims = Union[str, Iterable[Hashable], "ellipsis", None]
+OrderedDims = Union[str, Sequence[Union[Hashable, "ellipsis"]], "ellipsis", None]
 
 ErrorOptions = Literal["raise", "ignore"]
 ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"]
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index 60d2d65f068..7ecb73049d1 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -1,9 +1,44 @@
 """Internal utilities; not for external use"""
+# Some functions in this module are derived from functions in pandas. For
+# reference, here is a copy of the pandas copyright notice:
+
+# BSD 3-Clause License
+
+# Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+# All rights reserved.
+
+# Copyright (c) 2011-2022, Open source contributors.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+
+# * Neither the name of the copyright holder nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 from __future__ import annotations
 
 import contextlib
 import functools
 import importlib
+import inspect
 import io
 import itertools
 import math
@@ -22,9 +57,11 @@
     Hashable,
     Iterable,
     Iterator,
+    Literal,
     Mapping,
     MutableMapping,
     MutableSet,
+    Sequence,
     TypeVar,
     cast,
     overload,
@@ -34,7 +71,7 @@
 import pandas as pd
 
 if TYPE_CHECKING:
-    from .types import ErrorOptionsWithWarn
+    from .types import Dims, ErrorOptionsWithWarn, OrderedDims
 
 K = TypeVar("K")
 V = TypeVar("V")
@@ -472,7 +509,7 @@ class OrderedSet(MutableSet[T]):
 
     __slots__ = ("_d",)
 
-    def __init__(self, values: Iterable[T] = None):
+    def __init__(self, values: Iterable[T] | None = None):
         self._d = {}
         if values is not None:
             self.update(values)
@@ -619,15 +656,11 @@ def read_magic_number_from_file(filename_or_obj, count=8) -> bytes:
         magic_number = filename_or_obj[:count]
     elif isinstance(filename_or_obj, io.IOBase):
         if filename_or_obj.tell() != 0:
-            raise ValueError(
-                "cannot guess the engine, "
-                "file-like object read/write pointer not at the start of the file, "
-                "please close and reopen, or use a context manager"
-            )
+            filename_or_obj.seek(0)
         magic_number = filename_or_obj.read(count)
         filename_or_obj.seek(0)
     else:
-        raise TypeError(f"cannot read the magic number form {type(filename_or_obj)}")
+        raise TypeError(f"cannot read the magic number from {type(filename_or_obj)}")
     return magic_number
 
 
@@ -852,15 +885,17 @@ def drop_dims_from_indexers(
 
 
 def drop_missing_dims(
-    supplied_dims: Collection, dims: Collection, missing_dims: ErrorOptionsWithWarn
-) -> Collection:
+    supplied_dims: Iterable[Hashable],
+    dims: Iterable[Hashable],
+    missing_dims: ErrorOptionsWithWarn,
+) -> Iterable[Hashable]:
     """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that
     are not present in dims.
 
     Parameters
     ----------
-    supplied_dims : dict
-    dims : sequence
+    supplied_dims : Iterable of Hashable
+    dims : Iterable of Hashable
     missing_dims : {"raise", "warn", "ignore"}
     """
 
@@ -893,6 +928,158 @@ def drop_missing_dims(
         )
 
 
+T_None = TypeVar("T_None", None, "ellipsis")
+
+
+@overload
+def parse_dims(
+    dim: str | Iterable[Hashable] | T_None,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: Literal[True] = True,
+) -> tuple[Hashable, ...]:
+    ...
+
+
+@overload
+def parse_dims(
+    dim: str | Iterable[Hashable] | T_None,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: Literal[False],
+) -> tuple[Hashable, ...] | T_None:
+    ...
+
+
+def parse_dims(
+    dim: Dims,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: bool = True,
+) -> tuple[Hashable, ...] | None | ellipsis:
+    """Parse one or more dimensions.
+
+    A single dimension must be always a str, multiple dimensions
+    can be Hashables. This supports e.g. using a tuple as a dimension.
+    If you supply e.g. a set of dimensions the order cannot be
+    conserved, but for sequences it will be.
+
+    Parameters
+    ----------
+    dim : str, Iterable of Hashable, "..." or None
+        Dimension(s) to parse.
+    all_dims : tuple of Hashable
+        All possible dimensions.
+    check_exists: bool, default: True
+        if True, check if dim is a subset of all_dims.
+    replace_none : bool, default: True
+        If True, return all_dims if dim is None or "...".
+
+    Returns
+    -------
+    parsed_dims : tuple of Hashable
+        Input dimensions as a tuple.
+    """
+    if dim is None or dim is ...:
+        if replace_none:
+            return all_dims
+        return dim
+    if isinstance(dim, str):
+        dim = (dim,)
+    if check_exists:
+        _check_dims(set(dim), set(all_dims))
+    return tuple(dim)
+
+
+@overload
+def parse_ordered_dims(
+    dim: str | Sequence[Hashable | ellipsis] | T_None,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: Literal[True] = True,
+) -> tuple[Hashable, ...]:
+    ...
+
+
+@overload
+def parse_ordered_dims(
+    dim: str | Sequence[Hashable | ellipsis] | T_None,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: Literal[False],
+) -> tuple[Hashable, ...] | T_None:
+    ...
+
+
+def parse_ordered_dims(
+    dim: OrderedDims,
+    all_dims: tuple[Hashable, ...],
+    *,
+    check_exists: bool = True,
+    replace_none: bool = True,
+) -> tuple[Hashable, ...] | None | ellipsis:
+    """Parse one or more dimensions.
+
+    A single dimension must be always a str, multiple dimensions
+    can be Hashables. This supports e.g. using a tuple as a dimension.
+    An ellipsis ("...") in a sequence of dimensions will be
+    replaced with all remaining dimensions. This only makes sense when
+    the input is a sequence and not e.g. a set.
+
+    Parameters
+    ----------
+    dim : str, Sequence of Hashable or "...", "..." or None
+        Dimension(s) to parse. If "..." appears in a Sequence
+        it always gets replaced with all remaining dims
+    all_dims : tuple of Hashable
+        All possible dimensions.
+    check_exists: bool, default: True
+        if True, check if dim is a subset of all_dims.
+    replace_none : bool, default: True
+        If True, return all_dims if dim is None.
+
+    Returns
+    -------
+    parsed_dims : tuple of Hashable
+        Input dimensions as a tuple.
+    """
+    if dim is not None and dim is not ... and not isinstance(dim, str) and ... in dim:
+        dims_set: set[Hashable | ellipsis] = set(dim)
+        all_dims_set = set(all_dims)
+        if check_exists:
+            _check_dims(dims_set, all_dims_set)
+        if len(all_dims_set) != len(all_dims):
+            raise ValueError("Cannot use ellipsis with repeated dims")
+        dims = tuple(dim)
+        if dims.count(...) > 1:
+            raise ValueError("More than one ellipsis supplied")
+        other_dims = tuple(d for d in all_dims if d not in dims_set)
+        idx = dims.index(...)
+        return dims[:idx] + other_dims + dims[idx + 1 :]
+    else:
+        # mypy cannot resolve that the sequence cannot contain "..."
+        return parse_dims(  # type: ignore[call-overload]
+            dim=dim,
+            all_dims=all_dims,
+            check_exists=check_exists,
+            replace_none=replace_none,
+        )
+
+
+def _check_dims(dim: set[Hashable | ellipsis], all_dims: set[Hashable]) -> None:
+    wrong_dims = dim - all_dims
+    if wrong_dims and wrong_dims != {...}:
+        wrong_dims_str = ", ".join(f"'{d!s}'" for d in wrong_dims)
+        raise ValueError(
+            f"Dimension(s) {wrong_dims_str} do not exist. Expected one or more of {all_dims}"
+        )
+
+
 _Accessor = TypeVar("_Accessor")
 
 
@@ -972,3 +1159,46 @@ def module_available(module: str) -> bool:
         Whether the module is installed.
     """
     return importlib.util.find_spec(module) is not None
+
+
+def find_stack_level(test_mode=False) -> int:
+    """Find the first place in the stack that is not inside xarray.
+
+    This is unless the code emanates from a test, in which case we would prefer
+    to see the xarray source.
+
+    This function is taken from pandas.
+
+    Parameters
+    ----------
+    test_mode : bool
+        Flag used for testing purposes to switch off the detection of test
+        directories in the stack trace.
+
+    Returns
+    -------
+    stacklevel : int
+        First level in the stack that is not part of xarray.
+    """
+    import xarray as xr
+
+    pkg_dir = os.path.dirname(xr.__file__)
+    test_dir = os.path.join(pkg_dir, "tests")
+
+    # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
+    frame = inspect.currentframe()
+    n = 0
+    while frame:
+        fname = inspect.getfile(frame)
+        if fname.startswith(pkg_dir) and (not fname.startswith(test_dir) or test_mode):
+            frame = frame.f_back
+            n += 1
+        else:
+            break
+    return n
+
+
+def emit_user_level_warning(message, category=None):
+    """Emit a warning at the user level by inspecting the stack trace."""
+    stacklevel = find_stack_level()
+    warnings.warn(message, category=category, stacklevel=stacklevel)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 1e14e8dc38e..bb988392f50 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -71,7 +71,10 @@
 NON_NANOSECOND_WARNING = (
     "Converting non-nanosecond precision {case} values to nanosecond precision. "
     "This behavior can eventually be relaxed in xarray, as it is an artifact from "
-    "pandas which is now beginning to support non-nanosecond precision values."
+    "pandas which is now beginning to support non-nanosecond precision values. "
+    "This warning is caused by passing non-nanosecond np.datetime64 or "
+    "np.timedelta64 values to the DataArray or Variable constructor; it can be "
+    "silenced by converting the values to nanosecond precision ahead of time."
 )
 
 
@@ -191,14 +194,14 @@ def _as_nanosecond_precision(data):
         isinstance(dtype, pd.DatetimeTZDtype) and dtype.unit != "ns"
     )
     if non_ns_datetime64 or non_ns_datetime_tz_dtype:
-        warnings.warn(NON_NANOSECOND_WARNING.format(case="datetime"))
+        utils.emit_user_level_warning(NON_NANOSECOND_WARNING.format(case="datetime"))
         if isinstance(dtype, pd.DatetimeTZDtype):
             nanosecond_precision_dtype = pd.DatetimeTZDtype("ns", dtype.tz)
         else:
             nanosecond_precision_dtype = "datetime64[ns]"
         return data.astype(nanosecond_precision_dtype)
     elif dtype.kind == "m" and dtype != np.dtype("timedelta64[ns]"):
-        warnings.warn(NON_NANOSECOND_WARNING.format(case="timedelta"))
+        utils.emit_user_level_warning(NON_NANOSECOND_WARNING.format(case="timedelta"))
         return data.astype("timedelta64[ns]")
     else:
         return data
@@ -1136,7 +1139,7 @@ def chunk(
             | tuple[tuple[int, ...], ...]
             | Mapping[Any, None | int | tuple[int, ...]]
         ) = {},
-        name: str = None,
+        name: str | None = None,
         lock: bool = False,
         inline_array: bool = False,
         **chunks_kwargs: Any,
@@ -1285,7 +1288,7 @@ def _to_dense(self):
 
     def isel(
         self: T_Variable,
-        indexers: Mapping[Any, Any] = None,
+        indexers: Mapping[Any, Any] | None = None,
         missing_dims: ErrorOptionsWithWarn = "raise",
         **indexers_kwargs: Any,
     ) -> T_Variable:
@@ -1506,7 +1509,7 @@ def pad(
         if reflect_type is not None:
             pad_option_kwargs["reflect_type"] = reflect_type
 
-        array = np.pad(  # type: ignore[call-overload]
+        array = np.pad(
             self.data.astype(dtype, copy=False),
             pad_width_by_index,
             mode=mode,
@@ -1886,7 +1889,7 @@ def clip(self, min=None, max=None):
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
         keepdims: bool = False,
@@ -2119,9 +2122,9 @@ def quantile(
         q: ArrayLike,
         dim: str | Sequence[Hashable] | None = None,
         method: QuantileMethods = "linear",
-        keep_attrs: bool = None,
-        skipna: bool = None,
-        interpolation: QuantileMethods = None,
+        keep_attrs: bool | None = None,
+        skipna: bool | None = None,
+        interpolation: QuantileMethods | None = None,
     ) -> Variable:
         """Compute the qth quantile of the data along the specified dimension.
 
@@ -2519,7 +2522,7 @@ def coarsen_reshape(self, windows, boundary, side):
 
         return variable.data.reshape(shape), tuple(axes)
 
-    def isnull(self, keep_attrs: bool = None):
+    def isnull(self, keep_attrs: bool | None = None):
         """Test each value in the array for whether it is a missing value.
 
         Returns
@@ -2553,7 +2556,7 @@ def isnull(self, keep_attrs: bool = None):
             keep_attrs=keep_attrs,
         )
 
-    def notnull(self, keep_attrs: bool = None):
+    def notnull(self, keep_attrs: bool | None = None):
         """Test each value in the array for whether it is not a missing value.
 
         Returns
@@ -2660,7 +2663,7 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
     def _unravel_argminmax(
         self,
         argminmax: str,
-        dim: Dims | ellipsis,
+        dim: Dims,
         axis: int | None,
         keep_attrs: bool | None,
         skipna: bool | None,
@@ -2729,10 +2732,10 @@ def _unravel_argminmax(
 
     def argmin(
         self,
-        dim: Dims | ellipsis = None,
-        axis: int = None,
-        keep_attrs: bool = None,
-        skipna: bool = None,
+        dim: Dims = None,
+        axis: int | None = None,
+        keep_attrs: bool | None = None,
+        skipna: bool | None = None,
     ) -> Variable | dict[Hashable, Variable]:
         """Index or indices of the minimum of the Variable over one or more dimensions.
         If a sequence is passed to 'dim', then result returned as dict of Variables,
@@ -2774,10 +2777,10 @@ def argmin(
 
     def argmax(
         self,
-        dim: Dims | ellipsis = None,
-        axis: int = None,
-        keep_attrs: bool = None,
-        skipna: bool = None,
+        dim: Dims = None,
+        axis: int | None = None,
+        keep_attrs: bool | None = None,
+        skipna: bool | None = None,
     ) -> Variable | dict[Hashable, Variable]:
         """Index or indices of the maximum of the Variable over one or more dimensions.
         If a sequence is passed to 'dim', then result returned as dict of Variables,
@@ -2996,7 +2999,7 @@ def _data_equals(self, other):
 
     def to_index_variable(self) -> IndexVariable:
         """Return this variable as an xarray.IndexVariable"""
-        return self.copy()
+        return self.copy(deep=False)
 
     to_coord = utils.alias(to_index_variable, "to_coord")
 
diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
index fafe8188792..0f3a9aa3432 100644
--- a/xarray/core/weighted.py
+++ b/xarray/core/weighted.py
@@ -207,7 +207,7 @@ def _check_dim(self, dim: Dims):
     def _reduce(
         da: DataArray,
         weights: DataArray,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         skipna: bool | None = None,
     ) -> DataArray:
         """reduce using dot; equivalent to (da * weights).sum(dim, skipna)
@@ -312,7 +312,7 @@ def _weighted_quantile(
         da: DataArray,
         q: ArrayLike,
         dim: Dims = None,
-        skipna: bool = None,
+        skipna: bool | None = None,
     ) -> DataArray:
         """Apply a weighted ``quantile`` to a DataArray along some dimension(s)."""
 
@@ -516,7 +516,7 @@ def quantile(
         q: ArrayLike,
         *,
         dim: Dims = None,
-        keep_attrs: bool = None,
+        keep_attrs: bool | None = None,
         skipna: bool = True,
     ) -> T_Xarray:
 
diff --git a/xarray/plot/dataarray_plot.py b/xarray/plot/dataarray_plot.py
index ac23f7dc96d..46aaf33f683 100644
--- a/xarray/plot/dataarray_plot.py
+++ b/xarray/plot/dataarray_plot.py
@@ -1488,7 +1488,7 @@ def newplotfunc(
             if ax is None:
                 # TODO: Importing Axes3D is no longer necessary in matplotlib >= 3.2.
                 # Remove when minimum requirement of matplotlib is 3.2:
-                from mpl_toolkits.mplot3d import Axes3D  # type: ignore  # noqa: F401
+                from mpl_toolkits.mplot3d import Axes3D  # noqa: F401
 
                 # delete so it does not end up in locals()
                 del Axes3D
@@ -1521,7 +1521,7 @@ def newplotfunc(
             and not kwargs.get("_is_facetgrid", False)
             and ax is not None
         ):
-            import mpl_toolkits  # type: ignore
+            import mpl_toolkits
 
             if not isinstance(ax, mpl_toolkits.mplot3d.Axes3D):
                 raise ValueError(
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index 31daff58b55..ba5ea736bbd 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -51,8 +51,8 @@
 ROBUST_PERCENTILE = 2.0
 
 # copied from seaborn
-_MARKERSIZE_RANGE = (18.0, 72.0)
-_LINEWIDTH_RANGE = (1.5, 6.0)
+_MARKERSIZE_RANGE = (18.0, 36.0, 72.0)
+_LINEWIDTH_RANGE = (1.5, 1.5, 6.0)
 
 
 def import_matplotlib_pyplot():
@@ -609,8 +609,8 @@ def _resolve_intervals_1dplot(
         remove_drawstyle = False
 
         # Convert intervals to double points
-        x_is_interval = _valid_other_type(xval, [pd.Interval])
-        y_is_interval = _valid_other_type(yval, [pd.Interval])
+        x_is_interval = _valid_other_type(xval, pd.Interval)
+        y_is_interval = _valid_other_type(yval, pd.Interval)
         if x_is_interval and y_is_interval:
             raise TypeError("Can't step plot intervals against intervals.")
         elif x_is_interval:
@@ -628,10 +628,10 @@ def _resolve_intervals_1dplot(
     else:
 
         # Convert intervals to mid points and adjust labels
-        if _valid_other_type(xval, [pd.Interval]):
+        if _valid_other_type(xval, pd.Interval):
             xval = _interval_to_mid_points(xval)
             x_suffix = "_center"
-        if _valid_other_type(yval, [pd.Interval]):
+        if _valid_other_type(yval, pd.Interval):
             yval = _interval_to_mid_points(yval)
             y_suffix = "_center"
 
@@ -646,7 +646,7 @@ def _resolve_intervals_2dplot(val, func_name):
     increases length by 1.
     """
     label_extra = ""
-    if _valid_other_type(val, [pd.Interval]):
+    if _valid_other_type(val, pd.Interval):
         if func_name == "pcolormesh":
             val = _interval_to_bound_points(val)
         else:
@@ -656,11 +656,13 @@ def _resolve_intervals_2dplot(val, func_name):
     return val, label_extra
 
 
-def _valid_other_type(x, types):
+def _valid_other_type(
+    x: ArrayLike, types: type[object] | tuple[type[object], ...]
+) -> bool:
     """
     Do all elements of x have a type from types?
     """
-    return all(any(isinstance(el, t) for t in types) for el in np.ravel(x))
+    return all(isinstance(el, types) for el in np.ravel(x))
 
 
 def _valid_numpy_subdtype(x, numpy_types):
@@ -675,47 +677,49 @@ def _valid_numpy_subdtype(x, numpy_types):
     return any(np.issubdtype(x.dtype, t) for t in numpy_types)
 
 
-def _ensure_plottable(*args):
+def _ensure_plottable(*args) -> None:
     """
     Raise exception if there is anything in args that can't be plotted on an
     axis by matplotlib.
     """
-    numpy_types = [
+    numpy_types: tuple[type[object], ...] = (
         np.floating,
         np.integer,
         np.timedelta64,
         np.datetime64,
         np.bool_,
         np.str_,
-    ]
-    other_types = [datetime]
-    if cftime is not None:
-        cftime_datetime_types = [cftime.datetime]
-        other_types = other_types + cftime_datetime_types
-    else:
-        cftime_datetime_types = []
+    )
+    other_types: tuple[type[object], ...] = (datetime,)
+    cftime_datetime_types: tuple[type[object], ...] = (
+        () if cftime is None else (cftime.datetime,)
+    )
+    other_types += cftime_datetime_types
+
     for x in args:
         if not (
-            _valid_numpy_subdtype(np.array(x), numpy_types)
-            or _valid_other_type(np.array(x), other_types)
+            _valid_numpy_subdtype(np.asarray(x), numpy_types)
+            or _valid_other_type(np.asarray(x), other_types)
         ):
             raise TypeError(
                 "Plotting requires coordinates to be numeric, boolean, "
                 "or dates of type numpy.datetime64, "
                 "datetime.datetime, cftime.datetime or "
-                f"pandas.Interval. Received data of type {np.array(x).dtype} instead."
-            )
-        if (
-            _valid_other_type(np.array(x), cftime_datetime_types)
-            and not nc_time_axis_available
-        ):
-            raise ImportError(
-                "Plotting of arrays of cftime.datetime "
-                "objects or arrays indexed by "
-                "cftime.datetime objects requires the "
-                "optional `nc-time-axis` (v1.2.0 or later) "
-                "package."
+                f"pandas.Interval. Received data of type {np.asarray(x).dtype} instead."
             )
+        if _valid_other_type(np.asarray(x), cftime_datetime_types):
+            if nc_time_axis_available:
+                # Register cftime datetypes to matplotlib.units.registry,
+                # otherwise matplotlib will raise an error:
+                import nc_time_axis  # noqa: F401
+            else:
+                raise ImportError(
+                    "Plotting of arrays of cftime.datetime "
+                    "objects or arrays indexed by "
+                    "cftime.datetime objects requires the "
+                    "optional `nc-time-axis` (v1.2.0 or later) "
+                    "package."
+                )
 
 
 def _is_numeric(arr):
@@ -1337,7 +1341,7 @@ def _parse_size(
     else:
         levels = numbers = np.sort(np.unique(flatdata))
 
-    min_width, max_width = _MARKERSIZE_RANGE
+    min_width, default_width, max_width = _MARKERSIZE_RANGE
     # width_range = min_width, max_width
 
     if norm is None:
@@ -1374,8 +1378,8 @@ class _Normalize(Sequence):
     ----------
     data : DataArray
         DataArray to normalize.
-    width : Sequence of two numbers, optional
-        Normalize the data to theses min and max values.
+    width : Sequence of three numbers, optional
+        Normalize the data to these (min, default, max) values.
         The default is None.
     """
 
@@ -1384,7 +1388,7 @@ class _Normalize(Sequence):
     _data_unique_index: np.ndarray
     _data_unique_inverse: np.ndarray
     _data_is_numeric: bool
-    _width: tuple[float, float] | None
+    _width: tuple[float, float, float] | None
 
     __slots__ = (
         "_data",
@@ -1398,7 +1402,7 @@ class _Normalize(Sequence):
     def __init__(
         self,
         data: DataArray | None,
-        width: tuple[float, float] | None = None,
+        width: tuple[float, float, float] | None = None,
         _is_facetgrid: bool = False,
     ) -> None:
         self._data = data
@@ -1459,14 +1463,22 @@ def _calc_widths(self, y: DataArray) -> DataArray:
         ...
 
     def _calc_widths(self, y: np.ndarray | DataArray) -> np.ndarray | DataArray:
+        """
+        Normalize the values so they're inbetween self._width.
+        """
         if self._width is None:
             return y
 
-        x0, x1 = self._width
-
-        k = (y - np.min(y)) / (np.max(y) - np.min(y))
-        widths = x0 + k * (x1 - x0)
+        xmin, xdefault, xmax = self._width
 
+        diff_maxy_miny = np.max(y) - np.min(y)
+        if diff_maxy_miny == 0:
+            # Use default with if y is constant:
+            widths = xdefault + 0 * y
+        else:
+            # Normalize inbetween xmin and xmax:
+            k = (y - np.min(y)) / diff_maxy_miny
+            widths = xmin + k * (xmax - xmin)
         return widths
 
     @overload
@@ -1497,7 +1509,7 @@ def values(self) -> DataArray | None:
         array([3, 1, 1, 3, 5])
         Dimensions without coordinates: dim_0
 
-        >>> _Normalize(a, width=[18, 72]).values
+        >>> _Normalize(a, width=(18, 36, 72)).values
         <xarray.DataArray (dim_0: 5)>
         array([45., 18., 18., 45., 72.])
         Dimensions without coordinates: dim_0
@@ -1508,10 +1520,16 @@ def values(self) -> DataArray | None:
         array([0.5, 0. , 0. , 0.5, 2. , 3. ])
         Dimensions without coordinates: dim_0
 
-        >>> _Normalize(a, width=[18, 72]).values
+        >>> _Normalize(a, width=(18, 36, 72)).values
         <xarray.DataArray (dim_0: 6)>
         array([27., 18., 18., 27., 54., 72.])
         Dimensions without coordinates: dim_0
+
+        >>> _Normalize(a * 0, width=(18, 36, 72)).values
+        <xarray.DataArray (dim_0: 6)>
+        array([36., 36., 36., 36., 36., 36.])
+        Dimensions without coordinates: dim_0
+
         """
         if self.data is None:
             return None
@@ -1536,14 +1554,14 @@ def _values_unique(self) -> np.ndarray | None:
         >>> _Normalize(a)._values_unique
         array([1, 3, 5])
 
-        >>> _Normalize(a, width=[18, 72])._values_unique
+        >>> _Normalize(a, width=(18, 36, 72))._values_unique
         array([18., 45., 72.])
 
         >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3])
         >>> _Normalize(a)._values_unique
         array([0. , 0.5, 2. , 3. ])
 
-        >>> _Normalize(a, width=[18, 72])._values_unique
+        >>> _Normalize(a, width=(18, 36, 72))._values_unique
         array([18., 27., 54., 72.])
         """
         if self.data is None:
@@ -1615,7 +1633,7 @@ def format(self) -> FuncFormatter:
         Examples
         --------
         >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3])
-        >>> aa = _Normalize(a, width=[0, 1])
+        >>> aa = _Normalize(a, width=(0, 0.5, 1))
         >>> aa._lookup
         0.000000    0.0
         0.166667    0.5
@@ -1641,7 +1659,7 @@ def func(self) -> Callable[[Any, None | Any], Any]:
         Examples
         --------
         >>> a = xr.DataArray([0.5, 0, 0, 0.5, 2, 3])
-        >>> aa = _Normalize(a, width=[0, 1])
+        >>> aa = _Normalize(a, width=(0, 0.5, 1))
         >>> aa._lookup
         0.000000    0.0
         0.166667    0.5
@@ -1663,7 +1681,7 @@ def _determine_guide(
     sizeplt_norm: _Normalize,
     add_colorbar: None | bool = None,
     add_legend: None | bool = None,
-    plotfunc_name: str = None,
+    plotfunc_name: str | None = None,
 ) -> tuple[bool, bool]:
     if plotfunc_name == "hist":
         return False, False
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 584053d0213..6970a34b63d 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -35,6 +35,8 @@
 except ImportError:
     pass
 
+# https://github.com/pydata/xarray/issues/7322
+warnings.filterwarnings("ignore", "'urllib3.contrib.pyopenssl' module is deprecated")
 
 arm_xfail = pytest.mark.xfail(
     platform.machine() == "aarch64" or "arm" in platform.machine(),
@@ -68,7 +70,6 @@ def _importorskip(
 has_cftime, requires_cftime = _importorskip("cftime")
 has_dask, requires_dask = _importorskip("dask")
 has_bottleneck, requires_bottleneck = _importorskip("bottleneck")
-has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis")
 has_rasterio, requires_rasterio = _importorskip("rasterio")
 has_zarr, requires_zarr = _importorskip("zarr")
 has_fsspec, requires_fsspec = _importorskip("fsspec")
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 64030b3f595..81417f8a06a 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -102,6 +102,24 @@
 except ImportError:
     pass
 
+have_zarr_kvstore = False
+try:
+    from zarr.storage import KVStore
+
+    have_zarr_kvstore = True
+except ImportError:
+    KVStore = None
+
+have_zarr_v3 = False
+try:
+    # as of Zarr v2.13 these imports require environment variable
+    # ZARR_V3_EXPERIMENTAL_API=1
+    from zarr import DirectoryStoreV3, KVStoreV3
+
+    have_zarr_v3 = True
+except ImportError:
+    KVStoreV3 = None
+
 ON_WINDOWS = sys.platform == "win32"
 default_value = object()
 dask_array_type = array_type("dask")
@@ -1735,6 +1753,8 @@ def test_write_inconsistent_chunks(self) -> None:
 class ZarrBase(CFEncodedBase):
 
     DIMENSION_KEY = "_ARRAY_DIMENSIONS"
+    zarr_version = 2
+    version_kwargs: dict[str, Any] = {}
 
     def create_zarr_target(self):
         raise NotImplementedError
@@ -1742,14 +1762,18 @@ def create_zarr_target(self):
     @contextlib.contextmanager
     def create_store(self):
         with self.create_zarr_target() as store_target:
-            yield backends.ZarrStore.open_group(store_target, mode="w")
+            yield backends.ZarrStore.open_group(
+                store_target, mode="w", **self.version_kwargs
+            )
 
     def save(self, dataset, store_target, **kwargs):
-        return dataset.to_zarr(store=store_target, **kwargs)
+        return dataset.to_zarr(store=store_target, **kwargs, **self.version_kwargs)
 
     @contextlib.contextmanager
     def open(self, store_target, **kwargs):
-        with xr.open_dataset(store_target, engine="zarr", **kwargs) as ds:
+        with xr.open_dataset(
+            store_target, engine="zarr", **kwargs, **self.version_kwargs
+        ) as ds:
             yield ds
 
     @contextlib.contextmanager
@@ -1767,24 +1791,30 @@ def roundtrip(
 
     @pytest.mark.parametrize("consolidated", [False, True, None])
     def test_roundtrip_consolidated(self, consolidated) -> None:
+        if consolidated and self.zarr_version > 2:
+            pytest.xfail("consolidated metadata is not supported for zarr v3 yet")
         expected = create_test_data()
         with self.roundtrip(
             expected,
-            save_kwargs={"consolidated": True},
-            open_kwargs={"backend_kwargs": {"consolidated": True}},
+            save_kwargs={"consolidated": consolidated},
+            open_kwargs={"backend_kwargs": {"consolidated": consolidated}},
         ) as actual:
             self.check_dtypes_roundtripped(expected, actual)
             assert_identical(expected, actual)
 
     def test_read_non_consolidated_warning(self) -> None:
+
+        if self.zarr_version > 2:
+            pytest.xfail("consolidated metadata is not supported for zarr v3 yet")
+
         expected = create_test_data()
         with self.create_zarr_target() as store:
-            expected.to_zarr(store, consolidated=False)
+            expected.to_zarr(store, consolidated=False, **self.version_kwargs)
             with pytest.warns(
                 RuntimeWarning,
                 match="Failed to open Zarr store with consolidated",
             ):
-                with xr.open_zarr(store) as ds:
+                with xr.open_zarr(store, **self.version_kwargs) as ds:
                     assert_identical(ds, expected)
 
     def test_non_existent_store(self) -> None:
@@ -2076,10 +2106,14 @@ def test_write_persistence_modes(self, group) -> None:
         # check append mode for append write
         ds, ds_to_append, _ = create_append_test_data()
         with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target, mode="w", group=group)
-            ds_to_append.to_zarr(store_target, append_dim="time", group=group)
+            ds.to_zarr(store_target, mode="w", group=group, **self.version_kwargs)
+            ds_to_append.to_zarr(
+                store_target, append_dim="time", group=group, **self.version_kwargs
+            )
             original = xr.concat([ds, ds_to_append], dim="time")
-            actual = xr.open_dataset(store_target, group=group, engine="zarr")
+            actual = xr.open_dataset(
+                store_target, group=group, engine="zarr", **self.version_kwargs
+            )
             assert_identical(original, actual)
 
     def test_compressor_encoding(self) -> None:
@@ -2119,8 +2153,8 @@ def test_append_with_mode_rplus_success(self) -> None:
         original = Dataset({"foo": ("x", [1])})
         modified = Dataset({"foo": ("x", [2])})
         with self.create_zarr_target() as store:
-            original.to_zarr(store)
-            modified.to_zarr(store, mode="r+")
+            original.to_zarr(store, **self.version_kwargs)
+            modified.to_zarr(store, mode="r+", **self.version_kwargs)
             with self.open(store) as actual:
                 assert_identical(actual, modified)
 
@@ -2128,61 +2162,69 @@ def test_append_with_mode_rplus_fails(self) -> None:
         original = Dataset({"foo": ("x", [1])})
         modified = Dataset({"bar": ("x", [2])})
         with self.create_zarr_target() as store:
-            original.to_zarr(store)
+            original.to_zarr(store, **self.version_kwargs)
             with pytest.raises(
                 ValueError, match="dataset contains non-pre-existing variables"
             ):
-                modified.to_zarr(store, mode="r+")
+                modified.to_zarr(store, mode="r+", **self.version_kwargs)
 
     def test_append_with_invalid_dim_raises(self) -> None:
         ds, ds_to_append, _ = create_append_test_data()
         with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target, mode="w")
+            ds.to_zarr(store_target, mode="w", **self.version_kwargs)
             with pytest.raises(
                 ValueError, match="does not match any existing dataset dimensions"
             ):
-                ds_to_append.to_zarr(store_target, append_dim="notvalid")
+                ds_to_append.to_zarr(
+                    store_target, append_dim="notvalid", **self.version_kwargs
+                )
 
     def test_append_with_no_dims_raises(self) -> None:
         with self.create_zarr_target() as store_target:
-            Dataset({"foo": ("x", [1])}).to_zarr(store_target, mode="w")
+            Dataset({"foo": ("x", [1])}).to_zarr(
+                store_target, mode="w", **self.version_kwargs
+            )
             with pytest.raises(ValueError, match="different dimension names"):
-                Dataset({"foo": ("y", [2])}).to_zarr(store_target, mode="a")
+                Dataset({"foo": ("y", [2])}).to_zarr(
+                    store_target, mode="a", **self.version_kwargs
+                )
 
     def test_append_with_append_dim_not_set_raises(self) -> None:
         ds, ds_to_append, _ = create_append_test_data()
         with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target, mode="w")
+            ds.to_zarr(store_target, mode="w", **self.version_kwargs)
             with pytest.raises(ValueError, match="different dimension sizes"):
-                ds_to_append.to_zarr(store_target, mode="a")
+                ds_to_append.to_zarr(store_target, mode="a", **self.version_kwargs)
 
     def test_append_with_mode_not_a_raises(self) -> None:
         ds, ds_to_append, _ = create_append_test_data()
         with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target, mode="w")
+            ds.to_zarr(store_target, mode="w", **self.version_kwargs)
             with pytest.raises(ValueError, match="cannot set append_dim unless"):
-                ds_to_append.to_zarr(store_target, mode="w", append_dim="time")
+                ds_to_append.to_zarr(
+                    store_target, mode="w", append_dim="time", **self.version_kwargs
+                )
 
     def test_append_with_existing_encoding_raises(self) -> None:
         ds, ds_to_append, _ = create_append_test_data()
         with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target, mode="w")
+            ds.to_zarr(store_target, mode="w", **self.version_kwargs)
             with pytest.raises(ValueError, match="but encoding was provided"):
                 ds_to_append.to_zarr(
                     store_target,
                     append_dim="time",
                     encoding={"da": {"compressor": None}},
+                    **self.version_kwargs,
                 )
 
     @pytest.mark.parametrize("dtype", ["U", "S"])
     def test_append_string_length_mismatch_raises(self, dtype) -> None:
         ds, ds_to_append = create_append_string_length_mismatch_test_data(dtype)
         with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target, mode="w")
+            ds.to_zarr(store_target, mode="w", **self.version_kwargs)
             with pytest.raises(ValueError, match="Mismatched dtypes for variable"):
                 ds_to_append.to_zarr(
-                    store_target,
-                    append_dim="time",
+                    store_target, append_dim="time", **self.version_kwargs
                 )
 
     def test_check_encoding_is_consistent_after_append(self) -> None:
@@ -2195,13 +2237,17 @@ def test_check_encoding_is_consistent_after_append(self) -> None:
 
             compressor = zarr.Blosc()
             encoding = {"da": {"compressor": compressor}}
-            ds.to_zarr(store_target, mode="w", encoding=encoding)
-            ds_to_append.to_zarr(store_target, append_dim="time")
-            actual_ds = xr.open_dataset(store_target, engine="zarr")
+            ds.to_zarr(store_target, mode="w", encoding=encoding, **self.version_kwargs)
+            ds_to_append.to_zarr(store_target, append_dim="time", **self.version_kwargs)
+            actual_ds = xr.open_dataset(
+                store_target, engine="zarr", **self.version_kwargs
+            )
             actual_encoding = actual_ds["da"].encoding["compressor"]
             assert actual_encoding.get_config() == compressor.get_config()
             assert_identical(
-                xr.open_dataset(store_target, engine="zarr").compute(),
+                xr.open_dataset(
+                    store_target, engine="zarr", **self.version_kwargs
+                ).compute(),
                 xr.concat([ds, ds_to_append], dim="time"),
             )
 
@@ -2211,11 +2257,16 @@ def test_append_with_new_variable(self) -> None:
 
         # check append mode for new variable
         with self.create_zarr_target() as store_target:
-            xr.concat([ds, ds_to_append], dim="time").to_zarr(store_target, mode="w")
-            ds_with_new_var.to_zarr(store_target, mode="a")
+            xr.concat([ds, ds_to_append], dim="time").to_zarr(
+                store_target, mode="w", **self.version_kwargs
+            )
+            ds_with_new_var.to_zarr(store_target, mode="a", **self.version_kwargs)
             combined = xr.concat([ds, ds_to_append], dim="time")
             combined["new_var"] = ds_with_new_var["new_var"]
-            assert_identical(combined, xr.open_dataset(store_target, engine="zarr"))
+            assert_identical(
+                combined,
+                xr.open_dataset(store_target, engine="zarr", **self.version_kwargs),
+            )
 
     @requires_dask
     def test_to_zarr_compute_false_roundtrip(self) -> None:
@@ -2291,12 +2342,14 @@ def test_no_warning_from_open_emptydim_with_chunks(self) -> None:
             with self.roundtrip(ds, open_kwargs=dict(chunks={"a": 1})) as ds_reload:
                 assert_identical(ds, ds_reload)
 
-    @pytest.mark.parametrize("consolidated", [False, True])
+    @pytest.mark.parametrize("consolidated", [False, True, None])
     @pytest.mark.parametrize("compute", [False, True])
     @pytest.mark.parametrize("use_dask", [False, True])
     def test_write_region(self, consolidated, compute, use_dask) -> None:
         if (use_dask or not compute) and not has_dask:
             pytest.skip("requires dask")
+        if consolidated and self.zarr_version > 2:
+            pytest.xfail("consolidated metadata is not supported for zarr v3 yet")
 
         zeros = Dataset({"u": (("x",), np.zeros(10))})
         nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
@@ -2311,16 +2364,24 @@ def test_write_region(self, consolidated, compute, use_dask) -> None:
                 consolidated=consolidated,
                 compute=compute,
                 encoding={"u": dict(chunks=2)},
+                **self.version_kwargs,
             )
             if compute:
-                with xr.open_zarr(store, consolidated=consolidated) as actual:
+                with xr.open_zarr(
+                    store, consolidated=consolidated, **self.version_kwargs
+                ) as actual:
                     assert_identical(actual, zeros)
             for i in range(0, 10, 2):
                 region = {"x": slice(i, i + 2)}
                 nonzeros.isel(region).to_zarr(
-                    store, region=region, consolidated=consolidated
+                    store,
+                    region=region,
+                    consolidated=consolidated,
+                    **self.version_kwargs,
                 )
-            with xr.open_zarr(store, consolidated=consolidated) as actual:
+            with xr.open_zarr(
+                store, consolidated=consolidated, **self.version_kwargs
+            ) as actual:
                 assert_identical(actual, nonzeros)
 
     @pytest.mark.parametrize("mode", [None, "r+", "a"])
@@ -2328,10 +2389,12 @@ def test_write_region_mode(self, mode) -> None:
         zeros = Dataset({"u": (("x",), np.zeros(10))})
         nonzeros = Dataset({"u": (("x",), np.arange(1, 11))})
         with self.create_zarr_target() as store:
-            zeros.to_zarr(store)
+            zeros.to_zarr(store, **self.version_kwargs)
             for region in [{"x": slice(5)}, {"x": slice(5, 10)}]:
-                nonzeros.isel(region).to_zarr(store, region=region, mode=mode)
-            with xr.open_zarr(store) as actual:
+                nonzeros.isel(region).to_zarr(
+                    store, region=region, mode=mode, **self.version_kwargs
+                )
+            with xr.open_zarr(store, **self.version_kwargs) as actual:
                 assert_identical(actual, nonzeros)
 
     @requires_dask
@@ -2355,8 +2418,8 @@ def test_write_preexisting_override_metadata(self) -> None:
         )
 
         with self.create_zarr_target() as store:
-            original.to_zarr(store, compute=False)
-            both_modified.to_zarr(store, mode="a")
+            original.to_zarr(store, compute=False, **self.version_kwargs)
+            both_modified.to_zarr(store, mode="a", **self.version_kwargs)
             with self.open(store) as actual:
                 # NOTE: this arguably incorrect -- we should probably be
                 # overriding the variable metadata, too. See the TODO note in
@@ -2364,15 +2427,17 @@ def test_write_preexisting_override_metadata(self) -> None:
                 assert_identical(actual, global_modified)
 
         with self.create_zarr_target() as store:
-            original.to_zarr(store, compute=False)
-            both_modified.to_zarr(store, mode="r+")
+            original.to_zarr(store, compute=False, **self.version_kwargs)
+            both_modified.to_zarr(store, mode="r+", **self.version_kwargs)
             with self.open(store) as actual:
                 assert_identical(actual, only_new_data)
 
         with self.create_zarr_target() as store:
-            original.to_zarr(store, compute=False)
+            original.to_zarr(store, compute=False, **self.version_kwargs)
             # with region, the default mode becomes r+
-            both_modified.to_zarr(store, region={"x": slice(None)})
+            both_modified.to_zarr(
+                store, region={"x": slice(None)}, **self.version_kwargs
+            )
             with self.open(store) as actual:
                 assert_identical(actual, only_new_data)
 
@@ -2383,7 +2448,7 @@ def test_write_region_errors(self) -> None:
         @contextlib.contextmanager
         def setup_and_verify_store(expected=data):
             with self.create_zarr_target() as store:
-                data.to_zarr(store)
+                data.to_zarr(store, **self.version_kwargs)
                 yield store
                 with self.open(store) as actual:
                     assert_identical(actual, expected)
@@ -2391,7 +2456,7 @@ def setup_and_verify_store(expected=data):
         # verify the base case works
         expected = Dataset({"u": (("x",), np.array([10, 11, 2, 3, 4]))})
         with setup_and_verify_store(expected) as store:
-            data2.to_zarr(store, region={"x": slice(2)})
+            data2.to_zarr(store, region={"x": slice(2)}, **self.version_kwargs)
 
         with setup_and_verify_store() as store:
             with pytest.raises(
@@ -2400,46 +2465,57 @@ def setup_and_verify_store(expected=data):
                     "cannot set region unless mode='a', mode='r+' or mode=None"
                 ),
             ):
-                data.to_zarr(store, region={"x": slice(None)}, mode="w")
+                data.to_zarr(
+                    store, region={"x": slice(None)}, mode="w", **self.version_kwargs
+                )
 
         with setup_and_verify_store() as store:
             with pytest.raises(TypeError, match=r"must be a dict"):
-                data.to_zarr(store, region=slice(None))  # type: ignore[call-overload]
+                data.to_zarr(store, region=slice(None), **self.version_kwargs)  # type: ignore[call-overload]
 
         with setup_and_verify_store() as store:
             with pytest.raises(TypeError, match=r"must be slice objects"):
-                data2.to_zarr(store, region={"x": [0, 1]})  # type: ignore[dict-item]
+                data2.to_zarr(store, region={"x": [0, 1]}, **self.version_kwargs)  # type: ignore[dict-item]
 
         with setup_and_verify_store() as store:
             with pytest.raises(ValueError, match=r"step on all slices"):
-                data2.to_zarr(store, region={"x": slice(None, None, 2)})
+                data2.to_zarr(
+                    store, region={"x": slice(None, None, 2)}, **self.version_kwargs
+                )
 
         with setup_and_verify_store() as store:
             with pytest.raises(
                 ValueError,
                 match=r"all keys in ``region`` are not in Dataset dimensions",
             ):
-                data.to_zarr(store, region={"y": slice(None)})
+                data.to_zarr(store, region={"y": slice(None)}, **self.version_kwargs)
 
         with setup_and_verify_store() as store:
             with pytest.raises(
                 ValueError,
                 match=r"all variables in the dataset to write must have at least one dimension in common",
             ):
-                data2.assign(v=2).to_zarr(store, region={"x": slice(2)})
+                data2.assign(v=2).to_zarr(
+                    store, region={"x": slice(2)}, **self.version_kwargs
+                )
 
         with setup_and_verify_store() as store:
             with pytest.raises(
                 ValueError, match=r"cannot list the same dimension in both"
             ):
-                data.to_zarr(store, region={"x": slice(None)}, append_dim="x")
+                data.to_zarr(
+                    store,
+                    region={"x": slice(None)},
+                    append_dim="x",
+                    **self.version_kwargs,
+                )
 
         with setup_and_verify_store() as store:
             with pytest.raises(
                 ValueError,
                 match=r"variable 'u' already exists with different dimension sizes",
             ):
-                data2.to_zarr(store, region={"x": slice(3)})
+                data2.to_zarr(store, region={"x": slice(3)}, **self.version_kwargs)
 
     @requires_dask
     def test_encoding_chunksizes(self) -> None:
@@ -2481,10 +2557,10 @@ def test_chunk_encoding_with_larger_dask_chunks(self) -> None:
     def test_open_zarr_use_cftime(self) -> None:
         ds = create_test_data()
         with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target)
-            ds_a = xr.open_zarr(store_target)
+            ds.to_zarr(store_target, **self.version_kwargs)
+            ds_a = xr.open_zarr(store_target, **self.version_kwargs)
             assert_identical(ds, ds_a)
-            ds_b = xr.open_zarr(store_target, use_cftime=True)
+            ds_b = xr.open_zarr(store_target, use_cftime=True, **self.version_kwargs)
             assert xr.coding.times.contains_cftime_datetimes(ds_b.time)
 
     def test_write_read_select_write(self) -> None:
@@ -2493,13 +2569,13 @@ def test_write_read_select_write(self) -> None:
 
         # NOTE: using self.roundtrip, which uses open_dataset, will not trigger the bug.
         with self.create_zarr_target() as initial_store:
-            ds.to_zarr(initial_store, mode="w")
-            ds1 = xr.open_zarr(initial_store)
+            ds.to_zarr(initial_store, mode="w", **self.version_kwargs)
+            ds1 = xr.open_zarr(initial_store, **self.version_kwargs)
 
         # Combination of where+squeeze triggers error on write.
         ds_sel = ds1.where(ds1.coords["dim3"] == "a", drop=True).squeeze("dim3")
         with self.create_zarr_target() as final_store:
-            ds_sel.to_zarr(final_store, mode="w")
+            ds_sel.to_zarr(final_store, mode="w", **self.version_kwargs)
 
     @pytest.mark.parametrize("obj", [Dataset(), DataArray(name="foo")])
     def test_attributes(self, obj) -> None:
@@ -2508,21 +2584,24 @@ def test_attributes(self, obj) -> None:
         obj.attrs["good"] = {"key": "value"}
         ds = obj if isinstance(obj, Dataset) else obj.to_dataset()
         with self.create_zarr_target() as store_target:
-            ds.to_zarr(store_target)
-            assert_identical(ds, xr.open_zarr(store_target))
+            ds.to_zarr(store_target, **self.version_kwargs)
+            assert_identical(ds, xr.open_zarr(store_target, **self.version_kwargs))
 
         obj.attrs["bad"] = DataArray()
         ds = obj if isinstance(obj, Dataset) else obj.to_dataset()
         with self.create_zarr_target() as store_target:
             with pytest.raises(TypeError, match=r"Invalid attribute in Dataset.attrs."):
-                ds.to_zarr(store_target)
+                ds.to_zarr(store_target, **self.version_kwargs)
 
 
 @requires_zarr
 class TestZarrDictStore(ZarrBase):
     @contextlib.contextmanager
     def create_zarr_target(self):
-        yield {}
+        if have_zarr_kvstore:
+            yield KVStore({})
+        else:
+            yield {}
 
 
 @requires_zarr
@@ -2532,6 +2611,55 @@ def create_zarr_target(self):
         with create_tmp_file(suffix=".zarr") as tmp:
             yield tmp
 
+    @contextlib.contextmanager
+    def create_store(self):
+        with self.create_zarr_target() as store_target:
+            group = backends.ZarrStore.open_group(store_target, mode="w")
+            # older Zarr versions do not have the _store_version attribute
+            if have_zarr_v3:
+                # verify that a v2 store was created
+                assert group.zarr_group.store._store_version == 2
+            yield group
+
+
+class ZarrBaseV3(ZarrBase):
+    zarr_version = 3
+
+    def test_roundtrip_coordinates_with_space(self):
+        original = Dataset(coords={"x": 0, "y z": 1})
+        with pytest.warns(SerializationWarning):
+            # v3 stores do not allow spaces in the key name
+            with pytest.raises(ValueError):
+                with self.roundtrip(original):
+                    pass
+
+
+@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3")
+class TestZarrKVStoreV3(ZarrBaseV3):
+    @contextlib.contextmanager
+    def create_zarr_target(self):
+        yield KVStoreV3({})
+
+
+@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3")
+class TestZarrDirectoryStoreV3(ZarrBaseV3):
+    @contextlib.contextmanager
+    def create_zarr_target(self):
+        with create_tmp_file(suffix=".zr3") as tmp:
+            yield DirectoryStoreV3(tmp)
+
+
+@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3")
+class TestZarrDirectoryStoreV3FromPath(TestZarrDirectoryStoreV3):
+    # Must specify zarr_version=3 to get a v3 store because create_zarr_target
+    # is a string path.
+    version_kwargs = {"zarr_version": 3}
+
+    @contextlib.contextmanager
+    def create_zarr_target(self):
+        with create_tmp_file(suffix=".zr3") as tmp:
+            yield tmp
+
 
 @requires_zarr
 @requires_fsspec
@@ -3025,19 +3153,18 @@ def test_open_badbytes(self) -> None:
         with pytest.raises(
             ValueError, match=r"not the signature of a valid netCDF4 file"
         ):
-            with open_dataset(BytesIO(b"garbage"), engine="h5netcdf"):  # type: ignore[arg-type]
+            with open_dataset(BytesIO(b"garbage"), engine="h5netcdf"):
                 pass
 
     def test_open_twice(self) -> None:
         expected = create_test_data()
         expected.attrs["foo"] = "bar"
-        with pytest.raises(ValueError, match=r"read/write pointer not at the start"):
-            with create_tmp_file() as tmp_file:
-                expected.to_netcdf(tmp_file, engine="h5netcdf")
-                with open(tmp_file, "rb") as f:
+        with create_tmp_file() as tmp_file:
+            expected.to_netcdf(tmp_file, engine="h5netcdf")
+            with open(tmp_file, "rb") as f:
+                with open_dataset(f, engine="h5netcdf"):
                     with open_dataset(f, engine="h5netcdf"):
-                        with open_dataset(f, engine="h5netcdf"):
-                            pass
+                        pass
 
     @requires_scipy
     def test_open_fileobj(self) -> None:
@@ -3069,15 +3196,7 @@ def test_open_fileobj(self) -> None:
             # `raises_regex`?). Ref https://github.com/pydata/xarray/pull/5191
             with open(tmp_file, "rb") as f:
                 f.seek(8)
-                with pytest.raises(
-                    ValueError,
-                    match="match in any of xarray's currently installed IO",
-                ):
-                    with pytest.warns(
-                        RuntimeWarning,
-                        match=re.escape("'h5netcdf' fails while guessing"),
-                    ):
-                        open_dataset(f)
+                open_dataset(f)
 
 
 @requires_h5netcdf
@@ -4949,6 +5068,12 @@ def test_extract_nc4_variable_encoding(self) -> None:
         encoding = _extract_nc4_variable_encoding(var, unlimited_dims=("x",))
         assert {} == encoding
 
+    @requires_netCDF4
+    def test_extract_nc4_variable_encoding_netcdf4(self, monkeypatch):
+        # New netCDF4 1.6.0 compression argument.
+        var = xr.Variable(("x",), [1, 2, 3], {}, {"compression": "szlib"})
+        _extract_nc4_variable_encoding(var, backend="netCDF4", raise_on_invalid=True)
+
     def test_extract_h5nc_encoding(self) -> None:
         # not supported with h5netcdf (yet)
         var = xr.Variable(("x",), [1, 2, 3], {}, {"least_sigificant_digit": 2})
diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index 075393e84e7..d28f4594559 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -1385,3 +1385,9 @@ def test_date_range_like_errors():
         match="'source' must be a 1D array of datetime objects for inferring its range.",
     ):
         date_range_like(da, "noleap")
+
+
+def as_timedelta_not_implemented_error():
+    tick = Tick()
+    with pytest.raises(NotImplementedError):
+        tick.as_timedelta()
diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py
index 35447a39f3c..e780421e09e 100644
--- a/xarray/tests/test_cftimeindex_resample.py
+++ b/xarray/tests/test_cftimeindex_resample.py
@@ -9,7 +9,7 @@
 import xarray as xr
 from xarray.core.resample_cftime import CFTimeGrouper
 
-pytest.importorskip("cftime")
+cftime = pytest.importorskip("cftime")
 
 
 # Create a list of pairs of similar-length initial and resample frequencies
@@ -50,7 +50,63 @@
 ]
 
 
-def da(index):
+def compare_against_pandas(
+    da_datetimeindex,
+    da_cftimeindex,
+    freq,
+    closed=None,
+    label=None,
+    base=None,
+    offset=None,
+    origin=None,
+    loffset=None,
+) -> None:
+    if isinstance(origin, tuple):
+        origin_pandas = pd.Timestamp(datetime.datetime(*origin))
+        origin_cftime = cftime.DatetimeGregorian(*origin)
+    else:
+        origin_pandas = origin
+        origin_cftime = origin
+
+    try:
+        result_datetimeindex = da_datetimeindex.resample(
+            time=freq,
+            closed=closed,
+            label=label,
+            base=base,
+            loffset=loffset,
+            offset=offset,
+            origin=origin_pandas,
+        ).mean()
+    except ValueError:
+        with pytest.raises(ValueError):
+            da_cftimeindex.resample(
+                time=freq,
+                closed=closed,
+                label=label,
+                base=base,
+                loffset=loffset,
+                origin=origin_cftime,
+                offset=offset,
+            ).mean()
+    else:
+        result_cftimeindex = da_cftimeindex.resample(
+            time=freq,
+            closed=closed,
+            label=label,
+            base=base,
+            loffset=loffset,
+            origin=origin_cftime,
+            offset=offset,
+        ).mean()
+    # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass
+    result_cftimeindex["time"] = (
+        result_cftimeindex.xindexes["time"].to_pandas_index().to_datetimeindex()
+    )
+    xr.testing.assert_identical(result_cftimeindex, result_datetimeindex)
+
+
+def da(index) -> xr.DataArray:
     return xr.DataArray(
         np.arange(100.0, 100.0 + index.size), coords=[index], dims=["time"]
     )
@@ -59,53 +115,31 @@ def da(index):
 @pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x))
 @pytest.mark.parametrize("closed", [None, "left", "right"])
 @pytest.mark.parametrize("label", [None, "left", "right"])
-@pytest.mark.parametrize("base", [24, 31])
-def test_resample(freqs, closed, label, base) -> None:
+@pytest.mark.parametrize(
+    ("base", "offset"), [(24, None), (31, None), (None, "5S")], ids=lambda x: f"{x}"
+)
+def test_resample(freqs, closed, label, base, offset) -> None:
     initial_freq, resample_freq = freqs
     start = "2000-01-01T12:07:01"
+    loffset = "12H"
+    origin = "start"
     index_kwargs = dict(start=start, periods=5, freq=initial_freq)
     datetime_index = pd.date_range(**index_kwargs)
     cftime_index = xr.cftime_range(**index_kwargs)
+    da_datetimeindex = da(datetime_index)
+    da_cftimeindex = da(cftime_index)
 
-    loffset = "12H"
-    try:
-        da_datetime = (
-            da(datetime_index)
-            .resample(
-                time=resample_freq,
-                closed=closed,
-                label=label,
-                base=base,
-                loffset=loffset,
-            )
-            .mean()
-        )
-    except ValueError:
-        with pytest.raises(ValueError):
-            da(cftime_index).resample(
-                time=resample_freq,
-                closed=closed,
-                label=label,
-                base=base,
-                loffset=loffset,
-            ).mean()
-    else:
-        da_cftime = (
-            da(cftime_index)
-            .resample(
-                time=resample_freq,
-                closed=closed,
-                label=label,
-                base=base,
-                loffset=loffset,
-            )
-            .mean()
-        )
-        # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass
-        da_cftime["time"] = (
-            da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex()
-        )
-        xr.testing.assert_identical(da_cftime, da_datetime)
+    compare_against_pandas(
+        da_datetimeindex,
+        da_cftimeindex,
+        resample_freq,
+        closed=closed,
+        label=label,
+        base=base,
+        offset=offset,
+        origin=origin,
+        loffset=loffset,
+    )
 
 
 @pytest.mark.parametrize(
@@ -153,3 +187,54 @@ def test_calendars(calendar) -> None:
     # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass
     da_cftime["time"] = da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex()
     xr.testing.assert_identical(da_cftime, da_datetime)
+
+
+@pytest.mark.parametrize("closed", ["left", "right"])
+@pytest.mark.parametrize(
+    "origin",
+    ["start_day", "start", "end", "end_day", "epoch", (1970, 1, 1, 3, 2)],
+    ids=lambda x: f"{x}",
+)
+def test_origin(closed, origin) -> None:
+    initial_freq, resample_freq = ("3H", "9H")
+    start = "1969-12-31T12:07:01"
+    index_kwargs = dict(start=start, periods=12, freq=initial_freq)
+    datetime_index = pd.date_range(**index_kwargs)
+    cftime_index = xr.cftime_range(**index_kwargs)
+    da_datetimeindex = da(datetime_index)
+    da_cftimeindex = da(cftime_index)
+
+    compare_against_pandas(
+        da_datetimeindex,
+        da_cftimeindex,
+        resample_freq,
+        closed=closed,
+        origin=origin,
+    )
+
+
+def test_base_and_offset_error():
+    cftime_index = xr.cftime_range("2000", periods=5)
+    da_cftime = da(cftime_index)
+    with pytest.raises(ValueError, match="base and offset cannot"):
+        da_cftime.resample(time="2D", base=3, offset="5S")
+
+
+@pytest.mark.parametrize("offset", ["foo", "5MS", 10])
+def test_invalid_offset_error(offset) -> None:
+    cftime_index = xr.cftime_range("2000", periods=5)
+    da_cftime = da(cftime_index)
+    with pytest.raises(ValueError, match="offset must be"):
+        da_cftime.resample(time="2D", offset=offset)
+
+
+def test_timedelta_offset() -> None:
+    timedelta = datetime.timedelta(seconds=5)
+    string = "5S"
+
+    cftime_index = xr.cftime_range("2000", periods=5)
+    da_cftime = da(cftime_index)
+
+    timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean()
+    string_result = da_cftime.resample(time="2D", offset=string).mean()
+    xr.testing.assert_identical(timedelta_result, string_result)
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index a73e5472893..ca3b93728aa 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -614,7 +614,7 @@ def test_cf_timedelta_2d() -> None:
 
     actual = coding.times.decode_cf_timedelta(numbers, units)
     assert_array_equal(expected, actual)
-    assert expected.dtype == actual.dtype  # type: ignore
+    assert expected.dtype == actual.dtype
 
 
 @pytest.mark.parametrize(
@@ -651,7 +651,7 @@ def test_format_cftime_datetime(date_args, expected) -> None:
 def test_decode_cf(calendar) -> None:
     days = [1.0, 2.0, 3.0]
     # TODO: GH5690 — do we want to allow this type for `coords`?
-    da = DataArray(days, coords=[days], dims=["time"], name="test")  # type: ignore
+    da = DataArray(days, coords=[days], dims=["time"], name="test")
     ds = da.to_dataset()
 
     for v in ["test", "time"]:
diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
index da1bd014064..73889c362fe 100644
--- a/xarray/tests/test_computation.py
+++ b/xarray/tests/test_computation.py
@@ -1925,16 +1925,63 @@ def test_where() -> None:
 
 
 def test_where_attrs() -> None:
-    cond = xr.DataArray([True, False], dims="x", attrs={"attr": "cond"})
-    x = xr.DataArray([1, 1], dims="x", attrs={"attr": "x"})
-    y = xr.DataArray([0, 0], dims="x", attrs={"attr": "y"})
+    cond = xr.DataArray([True, False], coords={"a": [0, 1]}, attrs={"attr": "cond_da"})
+    cond["a"].attrs = {"attr": "cond_coord"}
+    x = xr.DataArray([1, 1], coords={"a": [0, 1]}, attrs={"attr": "x_da"})
+    x["a"].attrs = {"attr": "x_coord"}
+    y = xr.DataArray([0, 0], coords={"a": [0, 1]}, attrs={"attr": "y_da"})
+    y["a"].attrs = {"attr": "y_coord"}
+
+    # 3 DataArrays, takes attrs from x
     actual = xr.where(cond, x, y, keep_attrs=True)
-    expected = xr.DataArray([1, 0], dims="x", attrs={"attr": "x"})
+    expected = xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"})
+    expected["a"].attrs = {"attr": "x_coord"}
     assert_identical(expected, actual)
 
-    # ensure keep_attrs can handle scalar values
+    # x as a scalar, takes no attrs
+    actual = xr.where(cond, 0, y, keep_attrs=True)
+    expected = xr.DataArray([0, 0], coords={"a": [0, 1]})
+    assert_identical(expected, actual)
+
+    # y as a scalar, takes attrs from x
+    actual = xr.where(cond, x, 0, keep_attrs=True)
+    expected = xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"})
+    expected["a"].attrs = {"attr": "x_coord"}
+    assert_identical(expected, actual)
+
+    # x and y as a scalar, takes no attrs
     actual = xr.where(cond, 1, 0, keep_attrs=True)
-    assert actual.attrs == {}
+    expected = xr.DataArray([1, 0], coords={"a": [0, 1]})
+    assert_identical(expected, actual)
+
+    # cond and y as a scalar, takes attrs from x
+    actual = xr.where(True, x, y, keep_attrs=True)
+    expected = xr.DataArray([1, 1], coords={"a": [0, 1]}, attrs={"attr": "x_da"})
+    expected["a"].attrs = {"attr": "x_coord"}
+    assert_identical(expected, actual)
+
+    # DataArray and 2 Datasets, takes attrs from x
+    ds_x = xr.Dataset(data_vars={"x": x}, attrs={"attr": "x_ds"})
+    ds_y = xr.Dataset(data_vars={"x": y}, attrs={"attr": "y_ds"})
+    ds_actual = xr.where(cond, ds_x, ds_y, keep_attrs=True)
+    ds_expected = xr.Dataset(
+        data_vars={
+            "x": xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"})
+        },
+        attrs={"attr": "x_ds"},
+    )
+    ds_expected["a"].attrs = {"attr": "x_coord"}
+    assert_identical(ds_expected, ds_actual)
+
+    # 2 DataArrays and 1 Dataset, takes attrs from x
+    ds_actual = xr.where(cond, x.rename("x"), ds_y, keep_attrs=True)
+    ds_expected = xr.Dataset(
+        data_vars={
+            "x": xr.DataArray([1, 0], coords={"a": [0, 1]}, attrs={"attr": "x_da"})
+        },
+    )
+    ds_expected["a"].attrs = {"attr": "x_coord"}
+    assert_identical(ds_expected, ds_actual)
 
 
 @pytest.mark.parametrize(
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index ac6049872b8..8184fe1955c 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -512,7 +512,7 @@ def test_equals_and_identical(self) -> None:
 
     def test_equals_failures(self) -> None:
         orig = DataArray(np.arange(5.0), {"a": 42}, dims="x")
-        assert not orig.equals(np.arange(5))  # type: ignore
+        assert not orig.equals(np.arange(5))  # type: ignore[arg-type]
         assert not orig.identical(123)  # type: ignore
         assert not orig.broadcast_equals({1: 2})  # type: ignore
 
@@ -2754,9 +2754,9 @@ def test_quantile_method(self, method) -> None:
         actual = DataArray(self.va).quantile(q, method=method)
 
         if Version(np.__version__) >= Version("1.22.0"):
-            expected = np.nanquantile(self.dv.values, np.array(q), method=method)  # type: ignore[call-arg]
+            expected = np.nanquantile(self.dv.values, np.array(q), method=method)
         else:
-            expected = np.nanquantile(self.dv.values, np.array(q), interpolation=method)  # type: ignore[call-arg]
+            expected = np.nanquantile(self.dv.values, np.array(q), interpolation=method)
 
         np.testing.assert_allclose(actual.values, expected)
 
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 23ea705db71..8f3eb728f01 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -6230,7 +6230,7 @@ def test_query(self, backend, engine, parser) -> None:
         with pytest.raises(ValueError):
             ds.query("a > 5")  # type: ignore # must be dict or kwargs
         with pytest.raises(ValueError):
-            ds.query(x=(a > 5))  # type: ignore # must be query string
+            ds.query(x=(a > 5))
         with pytest.raises(IndexError):
             ds.query(y="a > 5")  # wrong length dimension
         with pytest.raises(IndexError):
diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py
index 780417c488b..2dd589fd872 100644
--- a/xarray/tests/test_distributed.py
+++ b/xarray/tests/test_distributed.py
@@ -189,7 +189,7 @@ def test_dask_distributed_zarr_integration_test(
         write_kwargs: dict[str, Any] = {"consolidated": True}
         read_kwargs: dict[str, Any] = {"backend_kwargs": {"consolidated": True}}
     else:
-        write_kwargs = read_kwargs = {}  # type: ignore
+        write_kwargs = read_kwargs = {}
     chunks = {"dim1": 4, "dim2": 3, "dim3": 5}
     with cluster() as (s, [a, b]):
         with Client(s["address"], loop=loop):
diff --git a/xarray/tests/test_extensions.py b/xarray/tests/test_extensions.py
index 6f91cdf661e..d4414b1f53a 100644
--- a/xarray/tests/test_extensions.py
+++ b/xarray/tests/test_extensions.py
@@ -37,7 +37,6 @@ def foo(self):
 
         da = xr.DataArray(0)
         assert da.demo.foo == "bar"
-
         # accessor is cached
         assert ds.demo is ds.demo
 
@@ -45,7 +44,7 @@ def foo(self):
         assert ds.demo.__doc__ == "Demo accessor."
         # TODO: typing doesn't seem to work with accessors
         assert xr.Dataset.demo.__doc__ == "Demo accessor."  # type: ignore
-        assert isinstance(ds.demo, DemoAccessor)  # type: ignore
+        assert isinstance(ds.demo, DemoAccessor)
         assert xr.Dataset.demo is DemoAccessor  # type: ignore
 
         # ensure we can remove it
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 814111f3afd..293a758b629 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -236,13 +236,13 @@ def test_da_groupby_quantile() -> None:
         dims=("x", "y"),
     )
 
-    actual_x = array.groupby("x").quantile(0, dim=...)  # type: ignore[arg-type]  # https://github.com/python/mypy/issues/7818
+    actual_x = array.groupby("x").quantile(0, dim=...)
     expected_x = xr.DataArray(
         data=[1, 4], coords={"x": [1, 2], "quantile": 0}, dims="x"
     )
     assert_identical(expected_x, actual_x)
 
-    actual_y = array.groupby("y").quantile(0, dim=...)  # type: ignore[arg-type]  # https://github.com/python/mypy/issues/7818
+    actual_y = array.groupby("y").quantile(0, dim=...)
     expected_y = xr.DataArray(
         data=[1, 22], coords={"y": [0, 1], "quantile": 0}, dims="y"
     )
@@ -273,7 +273,7 @@ def test_da_groupby_quantile() -> None:
     )
     g = foo.groupby(foo.time.dt.month)
 
-    actual = g.quantile(0, dim=...)  # type: ignore[arg-type]  # https://github.com/python/mypy/issues/7818
+    actual = g.quantile(0, dim=...)
     expected = xr.DataArray(
         data=[
             0.0,
@@ -357,11 +357,11 @@ def test_ds_groupby_quantile() -> None:
         coords={"x": [1, 1, 1, 2, 2], "y": [0, 0, 1]},
     )
 
-    actual_x = ds.groupby("x").quantile(0, dim=...)  # type: ignore[arg-type]  # https://github.com/python/mypy/issues/7818
+    actual_x = ds.groupby("x").quantile(0, dim=...)
     expected_x = xr.Dataset({"a": ("x", [1, 4])}, coords={"x": [1, 2], "quantile": 0})
     assert_identical(expected_x, actual_x)
 
-    actual_y = ds.groupby("y").quantile(0, dim=...)  # type: ignore[arg-type]  # https://github.com/python/mypy/issues/7818
+    actual_y = ds.groupby("y").quantile(0, dim=...)
     expected_y = xr.Dataset({"a": ("y", [1, 22])}, coords={"y": [0, 1], "quantile": 0})
     assert_identical(expected_y, actual_y)
 
@@ -387,7 +387,7 @@ def test_ds_groupby_quantile() -> None:
     )
     g = foo.groupby(foo.time.dt.month)
 
-    actual = g.quantile(0, dim=...)  # type: ignore[arg-type]  # https://github.com/python/mypy/issues/7818
+    actual = g.quantile(0, dim=...)
     expected = xr.Dataset(
         {
             "a": (
@@ -1864,6 +1864,33 @@ def test_upsample_interpolate_dask(self, chunked_time):
             # done here due to floating point arithmetic
             assert_allclose(expected, actual, rtol=1e-16)
 
+    def test_resample_base(self) -> None:
+        times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10)
+        array = DataArray(np.arange(10), [("time", times)])
+
+        base = 11
+        actual = array.resample(time="24H", base=base).mean()
+        expected = DataArray(array.to_series().resample("24H", base=base).mean())
+        assert_identical(expected, actual)
+
+    def test_resample_offset(self) -> None:
+        times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10)
+        array = DataArray(np.arange(10), [("time", times)])
+
+        offset = pd.Timedelta("11H")
+        actual = array.resample(time="24H", offset=offset).mean()
+        expected = DataArray(array.to_series().resample("24H", offset=offset).mean())
+        assert_identical(expected, actual)
+
+    def test_resample_origin(self) -> None:
+        times = pd.date_range("2000-01-01T02:03:01", freq="6H", periods=10)
+        array = DataArray(np.arange(10), [("time", times)])
+
+        origin = "start"
+        actual = array.resample(time="24H", origin=origin).mean()
+        expected = DataArray(array.to_series().resample("24H", origin=origin).mean())
+        assert_identical(expected, actual)
+
 
 class TestDatasetResample:
     def test_resample_and_first(self):
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index 01f616f92ba..d4a707b4e23 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -14,6 +14,7 @@
 import xarray as xr
 import xarray.plot as xplt
 from xarray import DataArray, Dataset
+from xarray.core.utils import module_available
 from xarray.plot.dataarray_plot import _infer_interval_breaks
 from xarray.plot.dataset_plot import _infer_meta_data
 from xarray.plot.utils import (
@@ -29,19 +30,20 @@
 from . import (
     assert_array_equal,
     assert_equal,
-    has_nc_time_axis,
     requires_cartopy,
     requires_cftime,
     requires_matplotlib,
-    requires_nc_time_axis,
     requires_seaborn,
 )
 
+# this should not be imported to test if the automatic lazy import works
+has_nc_time_axis = module_available("nc_time_axis")
+
 # import mpl and change the backend before other mpl imports
 try:
     import matplotlib as mpl
     import matplotlib.pyplot as plt
-    import mpl_toolkits  # type: ignore
+    import mpl_toolkits
 except ImportError:
     pass
 
@@ -2823,8 +2825,8 @@ def test_datetime_plot2d(self) -> None:
 
 
 @pytest.mark.filterwarnings("ignore:setting an array element with a sequence")
-@requires_nc_time_axis
 @requires_cftime
+@pytest.mark.skipif(not has_nc_time_axis, reason="nc_time_axis is not installed")
 class TestCFDatetimePlot(PlotTestCase):
     @pytest.fixture(autouse=True)
     def setUp(self) -> None:
@@ -3206,7 +3208,7 @@ def test_plot_empty_raises(val: list | float, method: str) -> None:
 
 
 @requires_matplotlib
-def test_facetgrid_axes_raises_deprecation_warning():
+def test_facetgrid_axes_raises_deprecation_warning() -> None:
     with pytest.warns(
         DeprecationWarning,
         match=(
diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py
index d44973e25e4..8029eb3f228 100644
--- a/xarray/tests/test_plugins.py
+++ b/xarray/tests/test_plugins.py
@@ -203,6 +203,7 @@ def test_lazy_import() -> None:
         "scipy",
         "zarr",
         "matplotlib",
+        "nc_time_axis",
         "flox",
         # "dask",  # TODO: backends.locks is not lazy yet :(
         "dask.array",
diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py
index c4b6294603f..a1a9a41d782 100644
--- a/xarray/tests/test_utils.py
+++ b/xarray/tests/test_utils.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Hashable
+from typing import Hashable, Iterable, Sequence
 
 import numpy as np
 import pandas as pd
@@ -253,6 +253,90 @@ def test_infix_dims_errors(supplied, all_):
         list(utils.infix_dims(supplied, all_))
 
 
+@pytest.mark.parametrize(
+    ["dim", "expected"],
+    [
+        pytest.param("a", ("a",), id="str"),
+        pytest.param(["a", "b"], ("a", "b"), id="list_of_str"),
+        pytest.param(["a", 1], ("a", 1), id="list_mixed"),
+        pytest.param(("a", "b"), ("a", "b"), id="tuple_of_str"),
+        pytest.param(["a", ("b", "c")], ("a", ("b", "c")), id="list_with_tuple"),
+        pytest.param((("b", "c"),), (("b", "c"),), id="tuple_of_tuple"),
+        pytest.param(None, None, id="None"),
+        pytest.param(..., ..., id="ellipsis"),
+    ],
+)
+def test_parse_dims(
+    dim: str | Iterable[Hashable] | None,
+    expected: tuple[Hashable, ...],
+) -> None:
+    all_dims = ("a", "b", 1, ("b", "c"))  # selection of different Hashables
+    actual = utils.parse_dims(dim, all_dims, replace_none=False)
+    assert actual == expected
+
+
+def test_parse_dims_set() -> None:
+    all_dims = ("a", "b", 1, ("b", "c"))  # selection of different Hashables
+    dim = {"a", 1}
+    actual = utils.parse_dims(dim, all_dims)
+    assert set(actual) == dim
+
+
+@pytest.mark.parametrize(
+    "dim", [pytest.param(None, id="None"), pytest.param(..., id="ellipsis")]
+)
+def test_parse_dims_replace_none(dim: None | ellipsis) -> None:
+    all_dims = ("a", "b", 1, ("b", "c"))  # selection of different Hashables
+    actual = utils.parse_dims(dim, all_dims, replace_none=True)
+    assert actual == all_dims
+
+
+@pytest.mark.parametrize(
+    "dim",
+    [
+        pytest.param("x", id="str_missing"),
+        pytest.param(["a", "x"], id="list_missing_one"),
+        pytest.param(["x", 2], id="list_missing_all"),
+    ],
+)
+def test_parse_dims_raises(dim: str | Iterable[Hashable]) -> None:
+    all_dims = ("a", "b", 1, ("b", "c"))  # selection of different Hashables
+    with pytest.raises(ValueError, match="'x'"):
+        utils.parse_dims(dim, all_dims, check_exists=True)
+
+
+@pytest.mark.parametrize(
+    ["dim", "expected"],
+    [
+        pytest.param("a", ("a",), id="str"),
+        pytest.param(["a", "b"], ("a", "b"), id="list"),
+        pytest.param([...], ("a", "b", "c"), id="list_only_ellipsis"),
+        pytest.param(["a", ...], ("a", "b", "c"), id="list_with_ellipsis"),
+        pytest.param(["a", ..., "b"], ("a", "c", "b"), id="list_with_middle_ellipsis"),
+    ],
+)
+def test_parse_ordered_dims(
+    dim: str | Sequence[Hashable | ellipsis],
+    expected: tuple[Hashable, ...],
+) -> None:
+    all_dims = ("a", "b", "c")
+    actual = utils.parse_ordered_dims(dim, all_dims)
+    assert actual == expected
+
+
+def test_parse_ordered_dims_raises() -> None:
+    all_dims = ("a", "b", "c")
+
+    with pytest.raises(ValueError, match="'x' do not exist"):
+        utils.parse_ordered_dims("x", all_dims, check_exists=True)
+
+    with pytest.raises(ValueError, match="repeated dims"):
+        utils.parse_ordered_dims(["a", ...], all_dims + ("a",))
+
+    with pytest.raises(ValueError, match="More than one ellipsis"):
+        utils.parse_ordered_dims(["a", ..., "b", ...], all_dims)
+
+
 @pytest.mark.parametrize(
     "nested_list, expected",
     [
@@ -266,3 +350,13 @@ def test_infix_dims_errors(supplied, all_):
 )
 def test_iterate_nested(nested_list, expected):
     assert list(iterate_nested(nested_list)) == expected
+
+
+def test_find_stack_level():
+    assert utils.find_stack_level() == 1
+    assert utils.find_stack_level(test_mode=True) == 2
+
+    def f():
+        return utils.find_stack_level(test_mode=True)
+
+    assert f() == 3
diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
index 983c584f69d..3a6c8f1b966 100644
--- a/xarray/tests/test_variable.py
+++ b/xarray/tests/test_variable.py
@@ -1769,9 +1769,9 @@ def test_quantile_method(self, method, use_dask) -> None:
         actual = v.quantile(q, dim="y", method=method)
 
         if Version(np.__version__) >= Version("1.22"):
-            expected = np.nanquantile(self.d, q, axis=1, method=method)  # type: ignore[call-arg]
+            expected = np.nanquantile(self.d, q, axis=1, method=method)
         else:
-            expected = np.nanquantile(self.d, q, axis=1, interpolation=method)  # type: ignore[call-arg]
+            expected = np.nanquantile(self.d, q, axis=1, interpolation=method)
 
         if use_dask:
             assert isinstance(actual.data, dask_array_type)
diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py
index 58cd5b20b9c..b2a61ed5eb2 100644
--- a/xarray/util/generate_aggregations.py
+++ b/xarray/util/generate_aggregations.py
@@ -60,7 +60,7 @@ class {obj}{cls}Aggregations:
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -71,7 +71,7 @@ def reduce(
 
     def _flox_reduce(
         self,
-        dim: Dims | ellipsis,
+        dim: Dims,
         **kwargs: Any,
     ) -> {obj}:
         raise NotImplementedError()"""
@@ -84,7 +84,7 @@ class {obj}{cls}Aggregations:
     def reduce(
         self,
         func: Callable[..., Any],
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,
         axis: int | Sequence[int] | None = None,
         keep_attrs: bool | None = None,
@@ -95,7 +95,7 @@ def reduce(
 
     def _flox_reduce(
         self,
-        dim: Dims | ellipsis,
+        dim: Dims,
         **kwargs: Any,
     ) -> {obj}:
         raise NotImplementedError()"""
@@ -117,7 +117,7 @@ def {method}(
 TEMPLATE_REDUCTION_SIGNATURE_GROUPBY = '''
     def {method}(
         self,
-        dim: Dims | ellipsis = None,
+        dim: Dims = None,
         *,{extra_kwargs}
         keep_attrs: bool | None = None,
         **kwargs: Any,
@@ -149,9 +149,9 @@ def {method}(
         -----
         {notes}"""
 
-_DIM_DOCSTRING = """dim : str, Iterable of Hashable, or None, default: None
+_DIM_DOCSTRING = """dim : str, Iterable of Hashable, "..." or None, default: None
     Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"``
-    or ``dim=["x", "y"]``. If None, will reduce over all dimensions."""
+    or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions."""
 
 _DIM_DOCSTRING_GROUPBY = """dim : str, Iterable of Hashable, "..." or None, default: None
     Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"``
diff --git a/xarray/util/generate_ops.py b/xarray/util/generate_ops.py
index 7407e3d3f4f..0ca5954b056 100644
--- a/xarray/util/generate_ops.py
+++ b/xarray/util/generate_ops.py
@@ -110,7 +110,7 @@ def {method}(self: T_Dataset, other: DsCompatible) -> T_Dataset: ...{override}""
     @overload{override}
     def {method}(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def {method}(self, other: "DatasetGroupBy") -> "Dataset": ...  # type: ignore[misc]
+    def {method}(self, other: "DatasetGroupBy") -> "Dataset": ...
     @overload
     def {method}(self: T_DataArray, other: DaCompatible) -> T_DataArray: ..."""
 stub_var = """\
@@ -124,7 +124,7 @@ def {method}(self: T_Variable, other: VarCompatible) -> T_Variable: ..."""
     @overload{override}
     def {method}(self, other: T_Dataset) -> T_Dataset: ...
     @overload
-    def {method}(self, other: "DataArray") -> "Dataset": ...  # type: ignore[misc]
+    def {method}(self, other: "DataArray") -> "Dataset": ...
     @overload
     def {method}(self, other: GroupByIncompatible) -> NoReturn: ..."""
 stub_dagb = """\
diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py
index b8689e3a18f..d53f1aab65e 100755
--- a/xarray/util/print_versions.py
+++ b/xarray/util/print_versions.py
@@ -129,6 +129,7 @@ def show_versions(file=sys.stdout):
         ("pip", lambda mod: mod.__version__),
         ("conda", lambda mod: mod.__version__),
         ("pytest", lambda mod: mod.__version__),
+        ("mypy", lambda mod: importlib.metadata.version(mod.__name__)),
         # Misc.
         ("IPython", lambda mod: mod.__version__),
         ("sphinx", lambda mod: mod.__version__),