From f3ef9df75b7368ad5d28d14919e0026087475572 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 30 May 2024 23:27:41 -0700 Subject: [PATCH 01/12] add skipna parameter --- xcdat/spatial.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/xcdat/spatial.py b/xcdat/spatial.py index 15bec956..809621fd 100644 --- a/xcdat/spatial.py +++ b/xcdat/spatial.py @@ -76,6 +76,7 @@ def average( keep_weights: bool = False, lat_bounds: Optional[RegionAxisBounds] = None, lon_bounds: Optional[RegionAxisBounds] = None, + skipna=None, ) -> xr.Dataset: """ Calculates the spatial average for a rectilinear grid over an optionally @@ -196,7 +197,7 @@ def average( self._weights = weights self._validate_weights(dv, axis) - ds[dv.name] = self._averager(dv, axis) + ds[dv.name] = self._averager(dv, axis, skipna=skipna) if keep_weights: ds[self._weights.name] = self._weights @@ -702,7 +703,10 @@ def _validate_weights( ) def _averager( - self, data_var: xr.DataArray, axis: List[SpatialAxis] | Tuple[SpatialAxis, ...] + self, + data_var: xr.DataArray, + axis: List[SpatialAxis] | Tuple[SpatialAxis, ...], + skipna=None, ): """Perform a weighted average of a data variable. @@ -739,6 +743,6 @@ def _averager( dim.append(get_dim_keys(data_var, key)) with xr.set_options(keep_attrs=True): - weighted_mean = data_var.cf.weighted(weights).mean(dim=dim) + weighted_mean = data_var.cf.weighted(weights).mean(dim=dim, skipna=skipna) return weighted_mean From ad5e9e457c8ff46d1b297f59bd39838436dba968 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Fri, 31 May 2024 00:01:32 -0700 Subject: [PATCH 02/12] add skipna parameter --- xcdat/temporal.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 095c9cd2..c6755aa2 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -865,6 +865,7 @@ def _averager( keep_weights: bool = False, reference_period: Optional[Tuple[str, str]] = None, season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, + skipna=None, ) -> xr.Dataset: """Averages a data variable based on the averaging mode and frequency.""" ds = self._dataset.copy() @@ -874,9 +875,9 @@ def _averager( ds = self._preprocess_dataset(ds) if self._mode == "average": - dv_avg = self._average(ds, data_var) + dv_avg = self._average(ds, data_var, skipna=skipna) elif self._mode in ["group_average", "climatology", "departures"]: - dv_avg = self._group_average(ds, data_var) + dv_avg = self._group_average(ds, data_var, skipna=skipna) # The original time dimension is dropped from the dataset because # it becomes obsolete after the data variable is averaged. When the @@ -1445,7 +1446,7 @@ def _drop_leap_days(self, ds: xr.Dataset): ) return ds - def _average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: + def _average(self, ds: xr.Dataset, data_var: str, skipna=None) -> xr.DataArray: """Averages a data variable with the time dimension removed. Parameters @@ -1467,15 +1468,17 @@ def _average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: time_bounds = ds.bounds.get_bounds("T", var_key=data_var) self._weights = self._get_weights(time_bounds) - dv = dv.weighted(self._weights).mean(dim=self.dim) + dv = dv.weighted(self._weights).mean(dim=self.dim, skipna=skipna) # type: ignore else: - dv = dv.mean(dim=self.dim) + dv = dv.mean(dim=self.dim, skipna=skipna) # type: ignore dv = self._add_operation_attrs(dv) return dv - def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: + def _group_average( + self, ds: xr.Dataset, data_var: str, skipna=None + ) -> xr.DataArray: """Averages a data variable by time group. Parameters @@ -1515,12 +1518,14 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: # WA = sum(data*weights) / sum(weights). The denominator must be # included to take into account zero weight for missing data. with xr.set_options(keep_attrs=True): - dv = self._group_data(dv).sum() / self._group_data(weights).sum() + dv = self._group_data(dv).sum(skipna=skipna) / self._group_data( + weights + ).sum(skipna=skipna) # Restore the data variable's name. dv.name = data_var else: - dv = self._group_data(dv).mean() + dv = self._group_data(dv).mean(skipna=skipna) # After grouping and aggregating, the grouped time dimension's # attributes are removed. Xarray's `keep_attrs=True` option only keeps From 8204188675a7c580e6ed4f976226a04e1d715f17 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Fri, 31 May 2024 01:02:32 -0700 Subject: [PATCH 03/12] add skipna where it was missed --- xcdat/temporal.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index c6755aa2..15510aa5 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -163,7 +163,13 @@ class TemporalAccessor: def __init__(self, dataset: xr.Dataset): self._dataset: xr.Dataset = dataset - def average(self, data_var: str, weighted: bool = True, keep_weights: bool = False): + def average( + self, + data_var: str, + weighted: bool = True, + keep_weights: bool = False, + skipna=None, + ): """ Returns a Dataset with the average of a data variable and the time dimension removed. @@ -239,7 +245,12 @@ def average(self, data_var: str, weighted: bool = True, keep_weights: bool = Fal freq = _infer_freq(self._dataset[self.dim]) return self._averager( - data_var, "average", freq, weighted=weighted, keep_weights=keep_weights + data_var, + "average", + freq, + weighted=weighted, + keep_weights=keep_weights, + skipna=skipna, ) def group_average( From b65545b850b828017c3b8f80c9cc5ba0c77baee5 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Wed, 5 Jun 2024 16:30:54 -0700 Subject: [PATCH 04/12] add parameter documentation --- xcdat/spatial.py | 16 +++++++++++++--- xcdat/temporal.py | 25 +++++++++++++++++++++---- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/xcdat/spatial.py b/xcdat/spatial.py index 809621fd..96a16f4d 100644 --- a/xcdat/spatial.py +++ b/xcdat/spatial.py @@ -76,7 +76,7 @@ def average( keep_weights: bool = False, lat_bounds: Optional[RegionAxisBounds] = None, lon_bounds: Optional[RegionAxisBounds] = None, - skipna=None, + skipna: bool | None = None, ) -> xr.Dataset: """ Calculates the spatial average for a rectilinear grid over an optionally @@ -126,6 +126,11 @@ def average( ignored if ``weights`` are supplied. The lower bound can be larger than the upper bound (e.g., across the prime meridian, dateline), by default None. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). Returns ------- @@ -706,8 +711,8 @@ def _averager( self, data_var: xr.DataArray, axis: List[SpatialAxis] | Tuple[SpatialAxis, ...], - skipna=None, - ): + skipna: bool | None = None, + ) -> xr.DataArray: """Perform a weighted average of a data variable. This method assumes all specified keys in ``axis`` exists in the data @@ -725,6 +730,11 @@ def _averager( Data variable inside a Dataset. axis : List[SpatialAxis] | Tuple[SpatialAxis, ...] List of axis dimensions to average over. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). Returns ------- diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 15510aa5..e34eddde 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -168,7 +168,7 @@ def average( data_var: str, weighted: bool = True, keep_weights: bool = False, - skipna=None, + skipna: bool | None = None, ): """ Returns a Dataset with the average of a data variable and the time @@ -210,6 +210,11 @@ def average( keep_weights : bool, optional If calculating averages using weights, keep the weights in the final dataset output, by default False. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). Returns ------- @@ -876,7 +881,7 @@ def _averager( keep_weights: bool = False, reference_period: Optional[Tuple[str, str]] = None, season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, - skipna=None, + skipna: bool | None = None, ) -> xr.Dataset: """Averages a data variable based on the averaging mode and frequency.""" ds = self._dataset.copy() @@ -1457,7 +1462,9 @@ def _drop_leap_days(self, ds: xr.Dataset): ) return ds - def _average(self, ds: xr.Dataset, data_var: str, skipna=None) -> xr.DataArray: + def _average( + self, ds: xr.Dataset, data_var: str, skipna: bool | None = None + ) -> xr.DataArray: """Averages a data variable with the time dimension removed. Parameters @@ -1466,6 +1473,11 @@ def _average(self, ds: xr.Dataset, data_var: str, skipna=None) -> xr.DataArray: The dataset. data_var : str The key of the data variable. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). Returns ------- @@ -1488,7 +1500,7 @@ def _average(self, ds: xr.Dataset, data_var: str, skipna=None) -> xr.DataArray: return dv def _group_average( - self, ds: xr.Dataset, data_var: str, skipna=None + self, ds: xr.Dataset, data_var: str, skipna: bool | None = None ) -> xr.DataArray: """Averages a data variable by time group. @@ -1498,6 +1510,11 @@ def _group_average( The dataset. data_var : str The key of the data variable. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). Returns ------- From 888b29493f929a58b897d5d30805845ffa5ba8a8 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Wed, 5 Jun 2024 17:32:26 -0700 Subject: [PATCH 05/12] make sure time diff type is timedelta: convert timedelta64 to timedelta in such case --- xcdat/bounds.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/xcdat/bounds.py b/xcdat/bounds.py index 4ee799c3..e7fb7a6d 100644 --- a/xcdat/bounds.py +++ b/xcdat/bounds.py @@ -640,14 +640,10 @@ def _create_time_bounds( # noqa: C901 elif freq == "hour": # Determine the daily frequency for generating time bounds. if daily_subfreq is None: - diff = time.values[1] - time.values[0] - - # Arrays with `dtype="timedelta64[ns]"` must be converted to - # pandas timedelta objects in order to access the `.seconds` - # time component. - if isinstance(diff, np.timedelta64): - diff = pd.to_timedelta(diff) - + diff = pd.to_timedelta(time.values[1] - time.values[0]) + # `cftime` objects only support arithmetic using `timedelta` objects, so + # the values of `diff` must be casted from `dtype="timedelta64[ns]"` + # to `timedelta` objects. hrs = diff.seconds / 3600 daily_subfreq = int(24 / hrs) # type: ignore From 7b6da1ddfe0044721123e388d7ffa0c30ae815bb Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Wed, 5 Jun 2024 17:44:04 -0700 Subject: [PATCH 06/12] Use `Union` for skipna to allow bool or None, instead of using `|` operator that fails CI build for python 3.9 --- xcdat/spatial.py | 2 +- xcdat/temporal.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xcdat/spatial.py b/xcdat/spatial.py index 96a16f4d..541cbe6b 100644 --- a/xcdat/spatial.py +++ b/xcdat/spatial.py @@ -76,7 +76,7 @@ def average( keep_weights: bool = False, lat_bounds: Optional[RegionAxisBounds] = None, lon_bounds: Optional[RegionAxisBounds] = None, - skipna: bool | None = None, + skipna: Union[bool, None] = None, ) -> xr.Dataset: """ Calculates the spatial average for a rectilinear grid over an optionally diff --git a/xcdat/temporal.py b/xcdat/temporal.py index e34eddde..15d8d2ce 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -881,7 +881,7 @@ def _averager( keep_weights: bool = False, reference_period: Optional[Tuple[str, str]] = None, season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, - skipna: bool | None = None, + skipna: Union[bool, None] = None, ) -> xr.Dataset: """Averages a data variable based on the averaging mode and frequency.""" ds = self._dataset.copy() @@ -1463,7 +1463,7 @@ def _drop_leap_days(self, ds: xr.Dataset): return ds def _average( - self, ds: xr.Dataset, data_var: str, skipna: bool | None = None + self, ds: xr.Dataset, data_var: str, skipna: Union[bool, None] = None ) -> xr.DataArray: """Averages a data variable with the time dimension removed. From 8493958d60953d68ce826ad4ba7a51dd1469e88a Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Wed, 5 Jun 2024 17:49:11 -0700 Subject: [PATCH 07/12] Use `Union` for skipna to allow bool or None, instead of using `|` operator that fails CI build for python 3.9 (fixing those missed in the previous commit) --- xcdat/temporal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 15d8d2ce..644cba6e 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -168,7 +168,7 @@ def average( data_var: str, weighted: bool = True, keep_weights: bool = False, - skipna: bool | None = None, + skipna: Union[bool, None] = None, ): """ Returns a Dataset with the average of a data variable and the time @@ -1500,7 +1500,7 @@ def _average( return dv def _group_average( - self, ds: xr.Dataset, data_var: str, skipna: bool | None = None + self, ds: xr.Dataset, data_var: str, skipna: Union[bool, None] = None ) -> xr.DataArray: """Averages a data variable by time group. From 4bdd3337ff253a16d471a93e689bc01b84c97208 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Wed, 5 Jun 2024 22:27:18 -0700 Subject: [PATCH 08/12] retract the change for timedelta conversion to make a separate PR --- xcdat/bounds.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xcdat/bounds.py b/xcdat/bounds.py index e7fb7a6d..90984c6a 100644 --- a/xcdat/bounds.py +++ b/xcdat/bounds.py @@ -640,10 +640,7 @@ def _create_time_bounds( # noqa: C901 elif freq == "hour": # Determine the daily frequency for generating time bounds. if daily_subfreq is None: - diff = pd.to_timedelta(time.values[1] - time.values[0]) - # `cftime` objects only support arithmetic using `timedelta` objects, so - # the values of `diff` must be casted from `dtype="timedelta64[ns]"` - # to `timedelta` objects. + diff = time.values[1] - time.values[0] hrs = diff.seconds / 3600 daily_subfreq = int(24 / hrs) # type: ignore From c85c309f8306814d56af4035de8f02ce5ea86e64 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Mon, 24 Jun 2024 10:37:40 -0700 Subject: [PATCH 09/12] Add `skipna` arg to all temporal APIs - Add unit tests --- tests/test_spatial.py | 22 ++++++ tests/test_temporal.py | 169 +++++++++++++++++++++++++++++++++++++++++ xcdat/temporal.py | 41 +++++++--- 3 files changed, 220 insertions(+), 12 deletions(-) diff --git a/tests/test_spatial.py b/tests/test_spatial.py index fe0361cd..9e9ae786 100644 --- a/tests/test_spatial.py +++ b/tests/test_spatial.py @@ -180,6 +180,28 @@ def test_spatial_average_for_lat_region(self): assert result.identical(expected) + def test_spatial_average_for_lat_region_and_skipna(self): + ds = self.ds.copy(deep=True) + ds.ts[0] = np.nan + + # Specifying axis as a str instead of list of str. + result = ds.spatial.average("ts", axis=["Y"], lat_bounds=(-5.0, 5), skipna=True) + + expected = self.ds.copy() + expected["ts"] = xr.DataArray( + data=np.array( + [ + [np.nan, np.nan, np.nan, np.nan], + [1.0, 1.0, 1.0, 1.0], + [1.0, 1.0, 1.0, 1.0], + ] + ), + coords={"time": expected.time, "lon": expected.lon}, + dims=["time", "lon"], + ) + + assert result.identical(expected) + def test_spatial_average_for_domain_wrapping_p_meridian_non_cf_conventions( self, ): diff --git a/tests/test_temporal.py b/tests/test_temporal.py index 7951d3f3..7e8b0b2b 100644 --- a/tests/test_temporal.py +++ b/tests/test_temporal.py @@ -523,6 +523,57 @@ def test_weighted_annual_averages(self): assert result.ts.attrs == expected.ts.attrs assert result.time.attrs == expected.time.attrs + def test_weighted_annual_averages_and_skipna(self): + ds = self.ds.copy(deep=True) + ds.ts[0] = np.nan + + result = ds.temporal.group_average("ts", "year", skipna=True) + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[1]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( + [ + cftime.DatetimeGregorian(2000, 1, 1), + cftime.DatetimeGregorian(2001, 1, 1), + ], + ), + coords={ + "time": np.array( + [ + cftime.DatetimeGregorian(2000, 1, 1), + cftime.DatetimeGregorian(2001, 1, 1), + ], + ) + }, + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "test_attr": "test", + "operation": "temporal_avg", + "mode": "group_average", + "freq": "year", + "weighted": "True", + }, + ) + + xr.testing.assert_allclose(result, expected) + assert result.ts.attrs == expected.ts.attrs + assert result.time.attrs == expected.time.attrs + @requires_dask def test_weighted_annual_averages_with_chunking(self): ds = self.ds.copy().chunk({"time": 2}) @@ -1455,6 +1506,68 @@ def test_raises_deprecation_warning_with_drop_incomplete_djf_season_config(self) xr.testing.assert_identical(result, expected) + def test_weighted_seasonal_climatology_with_DJF_and_skipna(self): + ds = self.ds.copy(deep=True) + + # Replace all MAM values with np.nan. + djf_months = [3, 4, 5] + for mon in djf_months: + ds["ts"] = ds.ts.where(ds.ts.time.dt.month != mon, np.nan) + + result = ds.temporal.climatology( + "ts", + "season", + season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + skipna=True, + ) + + expected = ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.DatetimeGregorian(1, 1, 1), + cftime.DatetimeGregorian(1, 4, 1), + cftime.DatetimeGregorian(1, 7, 1), + cftime.DatetimeGregorian(1, 10, 1), + ], + ), + coords={ + "time": np.array( + [ + cftime.DatetimeGregorian(1, 1, 1), + cftime.DatetimeGregorian(1, 4, 1), + cftime.DatetimeGregorian(1, 7, 1), + cftime.DatetimeGregorian(1, 10, 1), + ], + ), + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((4, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "season", + "weighted": "True", + "dec_mode": "DJF", + "drop_incomplete_djf": "True", + }, + ) + expected.ts[1] = np.nan + + # MAM should be np.nan + assert result.identical(expected) + @requires_dask def test_chunked_weighted_seasonal_climatology_with_DJF(self): ds = self.ds.copy().chunk({"time": 2}) @@ -2296,6 +2409,62 @@ def test_weighted_seasonal_departures_with_DJF(self): xr.testing.assert_identical(result, expected) + def test_weighted_seasonal_departures_with_DJF_and_skipna(self): + ds = self.ds.copy(deep=True) + + # Replace all MAM values with np.nan. + djf_months = [3, 4, 5] + for mon in djf_months: + ds["ts"] = ds.ts.where(ds.ts.time.dt.month != mon, np.nan) + + result = ds.temporal.departures( + "ts", + "season", + weighted=True, + season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + skipna=True, + ) + + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[np.nan]], [[0.0]], [[0.0]], [[0.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( + [ + cftime.DatetimeGregorian(2000, 4, 1), + cftime.DatetimeGregorian(2000, 7, 1), + cftime.DatetimeGregorian(2000, 10, 1), + cftime.DatetimeGregorian(2001, 1, 1), + ], + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "test_attr": "test", + "operation": "temporal_avg", + "mode": "departures", + "freq": "season", + "weighted": "True", + "dec_mode": "DJF", + "drop_incomplete_djf": "True", + }, + ) + + assert result.identical(expected) + def test_weighted_seasonal_departures_with_DJF_and_keep_weights(self): ds = self.ds.copy() diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 644cba6e..10a5e8a5 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -168,7 +168,7 @@ def average( data_var: str, weighted: bool = True, keep_weights: bool = False, - skipna: Union[bool, None] = None, + skipna: bool | None = None, ): """ Returns a Dataset with the average of a data variable and the time @@ -210,7 +210,7 @@ def average( keep_weights : bool, optional If calculating averages using weights, keep the weights in the final dataset output, by default False. - skipna : bool or None, optional + skipna : bool | None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been @@ -265,6 +265,7 @@ def group_average( weighted: bool = True, keep_weights: bool = False, season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, + skipna: bool | None = None, ): """Returns a Dataset with average of a data variable by time group. @@ -361,6 +362,11 @@ def group_average( >>> ["Jul", "Aug", "Sep"], # "JulAugSep" >>> ["Oct", "Nov", "Dec"], # "OctNovDec" >>> ] + skipna : bool | None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). Returns ------- @@ -439,6 +445,7 @@ def group_average( weighted=weighted, keep_weights=keep_weights, season_config=season_config, + skipna=skipna, ) def climatology( @@ -449,6 +456,7 @@ def climatology( keep_weights: bool = False, reference_period: Optional[Tuple[str, str]] = None, season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, + skipna: bool | None = None, ): """Returns a Dataset with the climatology of a data variable. @@ -554,6 +562,11 @@ def climatology( >>> ["Jul", "Aug", "Sep"], # "JulAugSep" >>> ["Oct", "Nov", "Dec"], # "OctNovDec" >>> ] + skipna : bool | None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). Returns ------- @@ -637,6 +650,7 @@ def climatology( keep_weights, reference_period, season_config, + skipna, ) def departures( @@ -647,6 +661,7 @@ def departures( keep_weights: bool = False, reference_period: Optional[Tuple[str, str]] = None, season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, + skipna: bool | None = None, ) -> xr.Dataset: """ Returns a Dataset with the climatological departures (anomalies) for a @@ -763,6 +778,11 @@ def departures( >>> ["Jul", "Aug", "Sep"], # "JulAugSep" >>> ["Oct", "Nov", "Dec"], # "OctNovDec" >>> ] + skipna : bool | None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). Returns ------- @@ -843,11 +863,7 @@ def departures( inferred_freq = _infer_freq(ds[self.dim]) if inferred_freq != freq: ds_obs = ds_obs.temporal.group_average( - data_var, - freq, - weighted, - keep_weights, - season_config, + data_var, freq, weighted, keep_weights, season_config, skipna ) # 4. Calculate the climatology of the data variable. @@ -860,6 +876,7 @@ def departures( keep_weights, reference_period, season_config, + skipna, ) # 5. Calculate the departures for the data variable. @@ -881,7 +898,7 @@ def _averager( keep_weights: bool = False, reference_period: Optional[Tuple[str, str]] = None, season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, - skipna: Union[bool, None] = None, + skipna: bool | None = None, ) -> xr.Dataset: """Averages a data variable based on the averaging mode and frequency.""" ds = self._dataset.copy() @@ -1463,7 +1480,7 @@ def _drop_leap_days(self, ds: xr.Dataset): return ds def _average( - self, ds: xr.Dataset, data_var: str, skipna: Union[bool, None] = None + self, ds: xr.Dataset, data_var: str, skipna: bool | None = None ) -> xr.DataArray: """Averages a data variable with the time dimension removed. @@ -1473,7 +1490,7 @@ def _average( The dataset. data_var : str The key of the data variable. - skipna : bool or None, optional + skipna : bool | None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been @@ -1500,7 +1517,7 @@ def _average( return dv def _group_average( - self, ds: xr.Dataset, data_var: str, skipna: Union[bool, None] = None + self, ds: xr.Dataset, data_var: str, skipna: bool | None = None ) -> xr.DataArray: """Averages a data variable by time group. @@ -1510,7 +1527,7 @@ def _group_average( The dataset. data_var : str The key of the data variable. - skipna : bool or None, optional + skipna : bool | None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been From 1ce0fe197ee7f55815822ee43d06e4cadec32c65 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Wed, 6 Nov 2024 15:26:31 -0800 Subject: [PATCH 10/12] Add annotations import to `temporal.py` --- xcdat/temporal.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 10a5e8a5..11a5b0ba 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -1,6 +1,8 @@ """Module containing temporal functions.""" +from __future__ import annotations import warnings + from datetime import datetime from itertools import chain from typing import Dict, List, Literal, Optional, Tuple, TypedDict, Union, get_args @@ -1508,9 +1510,9 @@ def _average( time_bounds = ds.bounds.get_bounds("T", var_key=data_var) self._weights = self._get_weights(time_bounds) - dv = dv.weighted(self._weights).mean(dim=self.dim, skipna=skipna) # type: ignore + dv = dv.weighted(self._weights).mean(dim=self.dim, skipna=skipna) else: - dv = dv.mean(dim=self.dim, skipna=skipna) # type: ignore + dv = dv.mean(dim=self.dim, skipna=skipna) dv = self._add_operation_attrs(dv) From 9847dd744a238d39e09bc6a18ef82765dd4bba26 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Wed, 6 Nov 2024 15:32:51 -0800 Subject: [PATCH 11/12] Restore code in `bounds.py` that was accidentally deleted --- xcdat/bounds.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xcdat/bounds.py b/xcdat/bounds.py index 90984c6a..4ee799c3 100644 --- a/xcdat/bounds.py +++ b/xcdat/bounds.py @@ -641,6 +641,13 @@ def _create_time_bounds( # noqa: C901 # Determine the daily frequency for generating time bounds. if daily_subfreq is None: diff = time.values[1] - time.values[0] + + # Arrays with `dtype="timedelta64[ns]"` must be converted to + # pandas timedelta objects in order to access the `.seconds` + # time component. + if isinstance(diff, np.timedelta64): + diff = pd.to_timedelta(diff) + hrs = diff.seconds / 3600 daily_subfreq = int(24 / hrs) # type: ignore From e554f6fa2eaab1de76df4e008e7e339d3535d537 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 31 Jan 2025 21:39:56 +0000 Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xcdat/temporal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 11a5b0ba..19a43ea1 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -1,8 +1,8 @@ """Module containing temporal functions.""" from __future__ import annotations -import warnings +import warnings from datetime import datetime from itertools import chain from typing import Dict, List, Literal, Optional, Tuple, TypedDict, Union, get_args