Skip to content

Commit

Permalink
ADD: improved test coverage
Browse files Browse the repository at this point in the history
ADD: improved test coverage
  • Loading branch information
nocollier authored May 11, 2024
2 parents 63fdeea + 566cc7e commit 6069f55
Show file tree
Hide file tree
Showing 9 changed files with 304 additions and 41 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
"sphinx.ext.autodoc",
"sphinx.ext.viewcode",
"sphinx.ext.autosummary",
"sphinx_autodoc_typehints",
"sphinx.ext.doctest",
"sphinx.ext.intersphinx",
"sphinx.ext.extlinks",
"numpydoc",
"sphinx_autosummary_accessors",
"IPython.sphinxext.ipython_directive",
"myst_nb",
"sphinx_copybutton",
Expand Down
1 change: 1 addition & 0 deletions ilamb3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ilamb3._version import __version__ # noqa

# additional units that pint/cf-xarray does not handle
units.define("kg = 1e3 * g")
units.define("Mg = 1e6 * g")

__all__ = ["dataset", "compare", "analysis", "regions"]
Expand Down
12 changes: 6 additions & 6 deletions ilamb3/analysis/bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,16 @@ def __call__(
raise ValueError(msg)

# Build output datasets
ref_out = {"mean": ref_mean}
ref_out = ref_mean.rename_vars({varname: "mean"})
if use_uncertainty:
ref_out["uncert"] = uncert
ref_out = xr.Dataset(ref_out)
com_out = xr.Dataset(dict(bias=bias, bias_score=score))
com_out = bias.rename_vars({varname: "bias"})
com_out["bias_score"] = score[varname]
lat_name = dset.get_dim_name(com_mean, "lat")
lon_name = dset.get_dim_name(com_mean, "lon")
com_out["mean"] = com_mean.rename(
com_out["mean"] = com_mean.rename_dims(
{lat_name: f"{lat_name}_", lon_name: f"{lon_name}_"}
)
)[varname]

# Compute scalars over all regions
dfs = []
Expand Down Expand Up @@ -182,7 +182,7 @@ def __call__(
"bias_score",
region=region,
mean=True,
weight=ref_ if mass_weighting else None,
weight=ref_[varname] if mass_weighting else None,
)
dfs.append(
[
Expand Down
15 changes: 10 additions & 5 deletions ilamb3/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,21 @@ def _return_breaks(ds: xr.Dataset, dim_name: str):
for dim_name in [lat_name, lon_name]:
dim = iarg[dim_name]
try:
iarg = iarg.drop(dim.attrs["bounds"])
iarg = iarg.drop_vars(dim.attrs["bounds"])
except Exception:
pass
out.append(iarg)
return out


def is_spatially_aligned(dsa: xr.Dataset, dsb: xr.Dataset) -> bool:
"""Are the lats and lons of dsa and dsb close to each other?"""
"""Check that the lats and lons of dsa and dsb close to each other.
Parameters
----------
dsa, dsb
The datasets to compare.
"""
alat_name = dset.get_dim_name(dsa, "lat")
blat_name = dset.get_dim_name(dsb, "lat")
alon_name = dset.get_dim_name(dsa, "lon")
Expand Down Expand Up @@ -129,8 +135,7 @@ def pick_grid_aligned(


def trim_time(dsa: xr.Dataset, dsb: xr.Dataset) -> tuple[xr.Dataset, xr.Dataset]:
"""When comparing dsb to dsa, we need the maximal amount of temporal
overlap."""
"""Return the datasets trimmed to maximal temporal overlap."""
if "time" not in dsa.dims:
return dsa, dsb
if "time" not in dsb.dims:
Expand Down Expand Up @@ -174,7 +179,7 @@ def adjust_lon(dsa: xr.Dataset, dsb: xr.Dataset) -> tuple[xr.Dataset, xr.Dataset
def make_comparable(
ref: xr.Dataset, com: xr.Dataset, varname: str
) -> tuple[xr.Dataset, xr.Dataset]:
"""."""
"""Return the datasets in a form where they are comparable."""
# trim away time
ref, com = trim_time(ref, com)

Expand Down
73 changes: 53 additions & 20 deletions ilamb3/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Dataset functions for ILAMB"""
"""Functions which operate on datasets/dataarrays."""

from typing import Any, Literal, Union

Expand All @@ -20,8 +20,8 @@ def get_dim_name(
dim
The dimension to find in the dataset/dataarray
Note
----
Notes
-----
This function is meant to handle the problem that not all data calls the dimensions
the same things ('lat', 'Lat', 'latitude', etc). We could replace this with
cf-xarray functionality. My concern is that we want this to work even if the
Expand Down Expand Up @@ -69,8 +69,8 @@ def get_time_extent(
def compute_time_measures(dset: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray:
"""Return the length of each time interval.
Note
----
Notes
-----
In order to integrate in time, we need the time measures. While this function is
written for greatest flexibility, the most accurate time measures will be computed
when a dataset is passed in where the 'bounds' on the 'time' dimension are labeled
Expand Down Expand Up @@ -106,8 +106,8 @@ def _measure1d(time):
def compute_cell_measures(dset: Union[xr.Dataset, xr.DataArray]) -> xr.DataArray:
"""Return the area of each cell.
Note
----
Notes
-----
It would be better to get these from the model data itself, but they are not always
provided, particularly in reference data.
Expand Down Expand Up @@ -182,7 +182,6 @@ def coarsen_dataset(dset: xr.Dataset, res: float = 0.5) -> xr.Dataset:
The input dataset.
res
The target resolution in degrees.
"""
lat_name = get_dim_name(dset, "lat")
lon_name = get_dim_name(dset, "lon")
Expand All @@ -204,7 +203,7 @@ def coarsen_dataset(dset: xr.Dataset, res: float = 0.5) -> xr.Dataset:
.astype(int)
)
dset_coarse = (
(dset.drop("cell_measures") * dset["cell_measures"])
(dset.drop_vars("cell_measures") * dset["cell_measures"])
.coarsen({"lat": fine_per_coarse, "lon": fine_per_coarse}, boundary="pad")
.sum() # type: ignore
)
Expand Down Expand Up @@ -237,8 +236,8 @@ def integrate_time(
integral
The integral or mean.
Note
----
Notes
-----
This interface is useful in our analysis as many times we want to report the total
of a quantity (total mass of carbon) and other times we want the mean value (e.g.
temperature). This allows the analysis code to read the same where a flag can be
Expand Down Expand Up @@ -306,7 +305,7 @@ def integrate_space(
region: Union[None, str] = None,
mean: bool = False,
weight: Union[xr.DataArray, None] = None,
):
) -> xr.DataArray:
"""Return the space integral or mean of the dataset.
Parameters
Expand All @@ -329,8 +328,8 @@ def integrate_space(
integral
The integral or mean.
Note
----
Notes
-----
This interface is useful in our analysis as many times we want to report the total
of a quantity (total mass of carbon) and other times we want the mean value (e.g.
temperature). This allows the analysis code to read the same where a flag can be
Expand Down Expand Up @@ -359,7 +358,8 @@ def integrate_space(
var = var.pint.dequantify()
msr = msr.pint.dequantify()
if weight is not None:
msr *= weight
assert isinstance(weight, xr.DataArray)
msr = msr * weight.pint.dequantify()
out = var.weighted(msr)
if mean:
out = out.mean(dim=space)
Expand All @@ -370,11 +370,22 @@ def integrate_space(
return out.pint.quantify()


def sel(dset: xr.Dataset, coord: str, cmin: Any, cmax: Any):
def sel(dset: xr.Dataset, coord: str, cmin: Any, cmax: Any) -> xr.Dataset:
"""Return a selection of the dataset.
Note
----
Parameters
----------
dset
The input dataset.
coord
The coordinate to slice.
cmin
The minimum coordinate value.
cmax
The maximum coordinate value.
Notes
-----
The behavior of xarray.sel does not work for us here. We want to pick a slice of the
dataset but where the value lies in between the coord bounds. Then we clip the min
and max to be the limits of the slice.
Expand Down Expand Up @@ -425,7 +436,18 @@ def integrate_depth(
varname: Union[str, None] = None,
mean: bool = False,
) -> xr.DataArray:
"""Return the depth integral or mean of the dataset."""
"""Return the depth integral or mean of the dataset.
Parameters
----------
dset
The dataset of dataarray to integrate.
varname
If dset is a dataset, the variable name to integrate.
mean
Enable to take a depth mean.
"""
if isinstance(dset, xr.DataArray):
varname = dset.name
dset = dset.to_dataset(name=varname)
Expand Down Expand Up @@ -454,7 +476,18 @@ def convert(
unit: str,
varname: Union[str, None] = None,
) -> Union[xr.Dataset, xr.DataArray]:
"""Convert the units of the dataarray."""
"""Convert the units of the dataarray.
Parameters
----------
dset
The dataset (specify varname) or dataarray who units you wish to convert.
unit
The unit to which we will convert.
varname
If dset is a dataset, give the variable name to convert.
"""
dset = dset.pint.quantify()
if isinstance(dset, xr.DataArray):
return dset.pint.to(unit)
Expand Down
85 changes: 85 additions & 0 deletions ilamb3/tests/test_bias.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import numpy as np
import pandas as pd
import pytest

from ilamb3.analysis import bias_analysis
from ilamb3.regions import Regions
from ilamb3.tests.test_compare import generate_test_dset


def gen_quantile_dbase():
df = []
for r in Regions().regions:
for th in [70, 80]:
df.append(
{
"variable": "da",
"region": r,
"quantile": th,
"type": "bias",
"value": np.random.rand(1)[0] * 1e-10,
"unit": "kg m-2 s-1",
}
)
return pd.DataFrame(df)


@pytest.mark.parametrize(
"use_uncertainty,mass_weighting,score",
[
(True, False, 0.5037343625713414),
(False, False, 0.49809129117395395),
(True, True, 0.6211524133325482),
(False, True, 0.6162697692652096),
],
)
def test_bias_collier2018(use_uncertainty: bool, mass_weighting: bool, score: float):
grid = dict(nlat=10, nlon=20)
ref = generate_test_dset(**grid)
ref["da_bnds"] = generate_test_dset(seed=2, **grid)["da"] * 1e-2
ref["da"].attrs["bounds"] = "da_bnds"
com = generate_test_dset(seed=3, **grid)
analysis = bias_analysis("da")
df, _, _ = analysis(
ref,
com,
method="Collier2018",
use_uncertainty=use_uncertainty,
mass_weighting=mass_weighting,
)
df = df[df["type"] == "score"]
print(df.iloc[0].value)
assert len(df) == 1
assert np.allclose(df.iloc[0].value, score)


@pytest.mark.skip("incomplete")
@pytest.mark.parametrize(
"use_uncertainty,quantile_threshold,score",
[
(True, 80, 0.5037343625713414),
(False, 80, 0.49809129117395395),
(False, 70, 0.49809129117395395),
],
)
def test_bias_regionalquantiles(
use_uncertainty: bool, quantile_threshold: bool, score: float
):
grid = dict(nlat=10, nlon=20)
ref = generate_test_dset(**grid)
ref["da_bnds"] = generate_test_dset(seed=2, **grid)["da"] * 1e-2
ref["da"].attrs["bounds"] = "da_bnds"
com = generate_test_dset(seed=3, **grid)
analysis = bias_analysis("da")
df, _, _ = analysis(
ref,
com,
method="RegionalQuantiles",
use_uncertainty=use_uncertainty,
quantile_dbase=gen_quantile_dbase(),
quantile_threshold=quantile_threshold,
)
df = df[df["type"] == "score"]
print(df.iloc[0].value)
assert len(df) == 1
assert np.allclose(df.iloc[0].value, score)
48 changes: 48 additions & 0 deletions ilamb3/tests/test_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import numpy as np
import pandas as pd
import xarray as xr

import ilamb3.compare as cmp


def generate_test_dset(seed: int = 1, ntime=None, nlat=None, nlon=None):
rs = np.random.RandomState(seed)
coords = []
dims = []
if ntime is not None:
time = pd.date_range(start="2000-01-15", periods=ntime, freq="30D")
coords.append(time)
dims.append("time")
if nlat is not None:
lat = np.linspace(-90, 90, nlat + 1)
lat = 0.5 * (lat[1:] + lat[:-1])
coords.append(lat)
dims.append("lat")
if nlon is not None:
lon = np.linspace(-180, 180, nlon + 1)
lon = 0.5 * (lon[1:] + lon[:-1])
coords.append(lon)
dims.append("lon")
ds = xr.Dataset(
data_vars={
"da": xr.DataArray(
rs.rand(*[len(c) for c in coords]) * 1e-8, coords=coords, dims=dims
),
}
)
ds["da"].attrs["units"] = "kg m-2 s-1"
return ds


def test_nest_spatial_grids():
ds1 = generate_test_dset(nlat=2, nlon=3)
ds2 = generate_test_dset(nlat=5, nlon=7)
ds1 = ds1.cf.add_bounds("lat")
ds1_, ds2_ = cmp.nest_spatial_grids(ds1, ds2)
assert cmp.is_spatially_aligned(ds1_, ds2_)
assert np.allclose(
(ds1_ - ds2_).pint.dequantify().sum()["da"].values, -6.06395917e-08
)
dsa, dsb = cmp.pick_grid_aligned(ds1, ds2, ds1_, ds2_)
xr.testing.assert_allclose(dsa, ds1_)
xr.testing.assert_allclose(dsb, ds2_)
Loading

0 comments on commit 6069f55

Please sign in to comment.