Skip to content

Commit

Permalink
Merge pull request #27 from ScottWales/groupbystr
Browse files Browse the repository at this point in the history
Allow pandas periods for blocked_resample
  • Loading branch information
Scott Wales authored Jan 15, 2021
2 parents a7601f2 + d76c2b4 commit 3e3b7fa
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ source =
[tool:pytest]
addopts = --doctest-modules --doctest-glob="*.rst"
doctest_optionflags=ELLIPSIS
norecursedirs = benchmark notebooks
norecursedirs = benchmarks notebooks .asv

[mypy]
files = src/climtas,test
Expand Down
36 changes: 33 additions & 3 deletions src/climtas/blocked.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ def resample_op(block, op, axis, count):
v = v[:: self.count]
result.coords[k] = v

# Set after we create 'result' - if the original name is None it will
# be replaced by the dask name, so results won't be identical to xarray
result.name = self.da.name

return result

def mean(self) -> xarray.DataArray:
Expand Down Expand Up @@ -170,21 +174,47 @@ def blocked_resample(da: xarray.DataArray, indexer=None, **kwargs) -> BlockedRes
>>> time = pandas.date_range('20010101','20010110', freq='H', closed='left')
>>> hourly = xarray.DataArray(numpy.random.random(time.size), coords=[('time', time)])
>>> blocked_daily_max = blocked_resample(hourly, time='1D').max()
>>> xarray_daily_max = hourly.resample(time='1D').max()
>>> xarray.testing.assert_identical(blocked_daily_max, xarray_daily_max)
>>> blocked_daily_max = blocked_resample(hourly, time=24).max()
>>> xarray_daily_max = hourly.resample(time='1D').max()
>>> xarray.testing.assert_equal(blocked_daily_max, xarray_daily_max)
>>> xarray.testing.assert_identical(blocked_daily_max, xarray_daily_max)
Args:
da (:class:`xarray.DataArray`): Resample target
indexer/kwargs (Dict[dim, count]): Mapping of dimension name to count along that axis
indexer/kwargs (Dict[dim, count]): Mapping of dimension name to count
along that axis. May be an integer or a time interval understood by
pandas (that interval must evenly divide the dataset).
Returns:
:class:`BlockedResampler`
"""
if indexer is None:
indexer = kwargs
assert len(indexer) == 1
else:
indexer = {**indexer, **kwargs}

if len(indexer) != 1:
raise Exception(
f"Only one dimension can be resampled at a time, received {indexer}"
)

dim, count = list(indexer.items())[0]

if not isinstance(count, int):
# Something like a pandas period, resample the time axis to get the count
counts = da[dim].resample({dim: count}).count()
if counts.min() != counts.max():
raise Exception(
f"Period '{count}' does not evenly divide dimension '{dim}'"
)
count = counts.values[0]

if da.sizes[dim] % count != 0:
raise Exception(f"Period '{count}' does not evenly divide dimension '{dim}'")

return BlockedResampler(da, dim=dim, count=count)


Expand Down
30 changes: 30 additions & 0 deletions test/test_blocked.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,22 @@ def sample(request):
return samples[request.param]


@pytest.fixture(params=["daily", "daily_dask"])
def sample_hr(request):
time = pandas.date_range("20020101", "20050101", freq="H", closed="left")

samples = {
"daily": xarray.DataArray(
numpy.random.random(time.size), coords=[("time", time)]
),
"daily_dask": xarray.DataArray(
dask.array.random.random(time.size), coords=[("time", time)]
),
}

return samples[request.param]


def test_groupby_dayofyear(sample):
time = pandas.date_range("20020101", "20050101", freq="D", closed="left")
daily = xarray.DataArray(numpy.random.random(time.size), coords=[("time", time)])
Expand Down Expand Up @@ -171,6 +187,20 @@ def test_resample_safety(sample):
blocked_resample(sliced, time=24)


def test_resample(sample_hr):
expected = sample_hr.resample(time="D").mean()

result = blocked_resample(sample_hr, time=24).mean()
xarray.testing.assert_equal(expected, result)
xarray.testing.assert_identical(expected, result)

result = blocked_resample(sample_hr, time="D").mean()
xarray.testing.assert_identical(expected, result)

result = blocked_resample(sample_hr, {"time": "D"}).mean()
xarray.testing.assert_identical(expected, result)


def test_groupby_safety(sample):
# Not a coordinate
sliced = sample
Expand Down

0 comments on commit 3e3b7fa

Please sign in to comment.