Skip to content

Commit

Permalink
Better default behavior of the Coordinates constructor (#8107)
Browse files Browse the repository at this point in the history
* ``Coordinates.__init__`` create default indexes

... for any input dimension coordinate, if ``indexes=None``.

Also, if another ``Coordinates`` object is passed, extract its indexes
and raise if ``indexes`` is not None (no align/merge supported here).

* add docstring examples

* fix doctests

* fix tests

* update what's new
  • Loading branch information
benbovy authored Aug 31, 2023
1 parent afda88e commit 0f9f790
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 43 deletions.
6 changes: 6 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ New Features
Breaking changes
~~~~~~~~~~~~~~~~

- The :py:class:`Coordinates` constructor now creates a (pandas) index by
default for each dimension coordinate. To keep the previous behavior (no index
created), pass an empty dictionary to ``indexes``. The constructor now also
extracts and add the indexes from another :py:class:`Coordinates` object
passed via ``coords`` (:pull:`8107`).
By `Benoît Bovy <https://github.com/benbovy>`_.

Deprecations
~~~~~~~~~~~~
Expand Down
103 changes: 87 additions & 16 deletions xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from xarray.core.indexes import (
Index,
Indexes,
PandasIndex,
PandasMultiIndex,
assert_no_index_corrupted,
create_default_index_implicit,
Expand Down Expand Up @@ -192,22 +193,69 @@ class Coordinates(AbstractCoordinates):
Coordinates are either:
- returned via the :py:attr:`Dataset.coords` and :py:attr:`DataArray.coords`
properties.
- built from index objects (e.g., :py:meth:`Coordinates.from_pandas_multiindex`).
- built directly from coordinate data and index objects (beware that no consistency
check is done on those inputs).
In the latter case, no default (pandas) index is created.
properties
- built from Pandas or other index objects
(e.g., :py:meth:`Coordinates.from_pandas_multiindex`)
- built directly from coordinate data and Xarray ``Index`` objects (beware that
no consistency check is done on those inputs)
Parameters
----------
coords: dict-like
Mapping where keys are coordinate names and values are objects that
can be converted into a :py:class:`~xarray.Variable` object
(see :py:func:`~xarray.as_variable`).
indexes: dict-like
Mapping of where keys are coordinate names and values are
:py:class:`~xarray.indexes.Index` objects.
coords: dict-like, optional
Mapping where keys are coordinate names and values are objects that
can be converted into a :py:class:`~xarray.Variable` object
(see :py:func:`~xarray.as_variable`). If another
:py:class:`~xarray.Coordinates` object is passed, its indexes
will be added to the new created object.
indexes: dict-like, optional
Mapping of where keys are coordinate names and values are
:py:class:`~xarray.indexes.Index` objects. If None (default),
pandas indexes will be created for each dimension coordinate.
Passing an empty dictionary will skip this default behavior.
Examples
--------
Create a dimension coordinate with a default (pandas) index:
>>> xr.Coordinates({"x": [1, 2]})
Coordinates:
* x (x) int64 1 2
Create a dimension coordinate with no index:
>>> xr.Coordinates(coords={"x": [1, 2]}, indexes={})
Coordinates:
x (x) int64 1 2
Create a new Coordinates object from existing dataset coordinates
(indexes are passed):
>>> ds = xr.Dataset(coords={"x": [1, 2]})
>>> xr.Coordinates(ds.coords)
Coordinates:
* x (x) int64 1 2
Create indexed coordinates from a ``pandas.MultiIndex`` object:
>>> midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]])
>>> xr.Coordinates.from_pandas_multiindex(midx, "x")
Coordinates:
* x (x) object MultiIndex
* x_level_0 (x) object 'a' 'a' 'b' 'b'
* x_level_1 (x) int64 0 1 0 1
Create a new Dataset object by passing a Coordinates object:
>>> midx_coords = xr.Coordinates.from_pandas_multiindex(midx, "x")
>>> xr.Dataset(coords=midx_coords)
<xarray.Dataset>
Dimensions: (x: 4)
Coordinates:
* x (x) object MultiIndex
* x_level_0 (x) object 'a' 'a' 'b' 'b'
* x_level_1 (x) int64 0 1 0 1
Data variables:
*empty*
"""

Expand All @@ -227,17 +275,40 @@ def __init__(
from xarray.core.dataset import Dataset

if coords is None:
variables = {}
elif isinstance(coords, Coordinates):
coords = {}

variables: dict[Hashable, Variable]
default_indexes: dict[Hashable, PandasIndex] = {}
coords_obj_indexes: dict[Hashable, Index] = {}

if isinstance(coords, Coordinates):
if indexes is not None:
raise ValueError(
"passing both a ``Coordinates`` object and a mapping of indexes "
"to ``Coordinates.__init__`` is not allowed "
"(this constructor does not support merging them)"
)
variables = {k: v.copy() for k, v in coords.variables.items()}
coords_obj_indexes = dict(coords.xindexes)
else:
variables = {k: as_variable(v) for k, v in coords.items()}
variables = {}
for name, data in coords.items():
var = as_variable(data, name=name)
if var.dims == (name,) and indexes is None:
index, index_vars = create_default_index_implicit(var, list(coords))
default_indexes.update({k: index for k in index_vars})
variables.update(index_vars)
else:
variables[name] = var

if indexes is None:
indexes = {}
else:
indexes = dict(indexes)

indexes.update(default_indexes)
indexes.update(coords_obj_indexes)

no_coord_index = set(indexes) - set(variables)
if no_coord_index:
raise ValueError(
Expand Down
57 changes: 34 additions & 23 deletions xarray/tests/test_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ def test_init_noindex(self) -> None:
expected = Dataset(coords={"foo": ("x", [0, 1, 2])})
assert_identical(coords.to_dataset(), expected)

def test_init_default_index(self) -> None:
coords = Coordinates(coords={"x": [1, 2]})
expected = Dataset(coords={"x": [1, 2]})
assert_identical(coords.to_dataset(), expected)
assert "x" in coords.xindexes

def test_init_no_default_index(self) -> None:
# dimension coordinate with no default index (explicit)
coords = Coordinates(coords={"x": [1, 2]}, indexes={})
assert "x" not in coords.xindexes

def test_init_from_coords(self) -> None:
expected = Dataset(coords={"foo": ("x", [0, 1, 2])})
coords = Coordinates(coords=expected.coords)
Expand All @@ -25,10 +36,19 @@ def test_init_from_coords(self) -> None:
# test variables copied
assert coords.variables["foo"] is not expected.variables["foo"]

# default index
expected = Dataset(coords={"x": ("x", [0, 1, 2])})
coords = Coordinates(coords=expected.coords, indexes=expected.xindexes)
# test indexes are extracted
expected = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=expected.coords)
assert_identical(coords.to_dataset(), expected)
assert expected.xindexes == coords.xindexes

# coords + indexes not supported
with pytest.raises(
ValueError, match="passing both.*Coordinates.*indexes.*not allowed"
):
coords = Coordinates(
coords=expected.coords, indexes={"x": PandasIndex([0, 1, 2], "x")}
)

def test_init_empty(self) -> None:
coords = Coordinates()
Expand Down Expand Up @@ -60,37 +80,31 @@ def test_from_pandas_multiindex(self) -> None:
assert_identical(expected[name], coords.variables[name])

def test_dims(self) -> None:
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})
assert coords.dims == {"x": 3}

def test_sizes(self) -> None:
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})
assert coords.sizes == {"x": 3}

def test_dtypes(self) -> None:
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})
assert coords.dtypes == {"x": int}

def test_getitem(self) -> None:
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})
assert_identical(
coords["x"],
DataArray([0, 1, 2], coords={"x": [0, 1, 2]}, name="x"),
)

def test_delitem(self) -> None:
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})
del coords["x"]
assert "x" not in coords

def test_update(self) -> None:
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})

coords.update({"y": ("y", [4, 5, 6])})
assert "y" in coords
Expand All @@ -99,18 +113,16 @@ def test_update(self) -> None:
assert_identical(coords["y"], expected)

def test_equals(self):
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})

assert coords.equals(coords)
assert not coords.equals("no_a_coords")
assert not coords.equals("not_a_coords")

def test_identical(self):
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})

assert coords.identical(coords)
assert not coords.identical("no_a_coords")
assert not coords.identical("not_a_coords")

def test_copy(self) -> None:
no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])})
Expand All @@ -129,8 +141,7 @@ def test_copy(self) -> None:
assert source_ndarray(v0.data) is not source_ndarray(v1.data)

def test_align(self) -> None:
_ds = Dataset(coords={"x": [0, 1, 2]})
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
coords = Coordinates(coords={"x": [0, 1, 2]})

left = coords

Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ def test_constructor_dask_coords(self) -> None:

def test_constructor_no_default_index(self) -> None:
# explicitly passing a Coordinates object skips the creation of default index
da = DataArray(range(3), coords=Coordinates({"x": ("x", [1, 2, 3])}))
da = DataArray(range(3), coords=Coordinates({"x": [1, 2, 3]}, indexes={}))
assert "x" in da.coords
assert "x" not in da.xindexes

Expand Down Expand Up @@ -1587,7 +1587,7 @@ class CustomIndex(Index):
assert isinstance(actual.xindexes["x"], CustomIndex)

def test_assign_coords_no_default_index(self) -> None:
coords = Coordinates({"y": ("y", [1, 2, 3])})
coords = Coordinates({"y": [1, 2, 3]}, indexes={})
da = DataArray([1, 2, 3], dims="y")
actual = da.assign_coords(coords)
assert_identical(actual.coords, coords, check_default_indexes=False)
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def test_constructor_with_coords(self) -> None:

def test_constructor_no_default_index(self) -> None:
# explicitly passing a Coordinates object skips the creation of default index
ds = Dataset(coords=Coordinates({"x": ("x", [1, 2, 3])}))
ds = Dataset(coords=Coordinates({"x": [1, 2, 3]}, indexes={}))
assert "x" in ds
assert "x" not in ds.xindexes

Expand Down Expand Up @@ -4356,7 +4356,7 @@ class CustomIndex(Index):
assert isinstance(actual.xindexes["x"], CustomIndex)

def test_assign_coords_no_default_index(self) -> None:
coords = Coordinates({"y": ("y", [1, 2, 3])})
coords = Coordinates({"y": [1, 2, 3]}, indexes={})
ds = Dataset()
actual = ds.assign_coords(coords)
expected = coords.to_dataset()
Expand Down

0 comments on commit 0f9f790

Please sign in to comment.