Skip to content

Commit

Permalink
fix(data): improve management of timeagg and timestep in dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
ltrotter committed Feb 10, 2025
1 parent f942b06 commit 24f9bb7
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
24 changes: 14 additions & 10 deletions src/d3tools/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,12 @@ def __init__(self, **kwargs):
if 'time_signature' in kwargs:
self.time_signature = kwargs.pop('time_signature')

if 'aggregation' in kwargs:
self.agg = TimeWindow.from_str(kwargs.pop('aggregation'))

if 'timestep' in kwargs:
self.timestep = TimeStep.from_unit(kwargs.pop('timestep'))
if 'aggregation' in kwargs:
self.agg = TimeWindow.from_str(kwargs.pop('agg'))
if hasattr(self, 'agg'):
self.timestep = self.timestep.with_agg(self.agg)

if 'notification' in kwargs:
Expand Down Expand Up @@ -114,8 +116,10 @@ def update(self, in_place = False, **kwargs):
new_dataset._tile_names = self._tile_names

new_dataset.time_signature = self.time_signature
if hasattr(self, 'timestep'):
if hasattr(self, 'timestep') and self.timestep is not None:
new_dataset.timestep = self.timestep
if hasattr(self, 'agg'):
new_dataset.agg = self.agg

new_tags = self.tags.copy()
new_tags.update(kwargs)
Expand Down Expand Up @@ -303,7 +307,7 @@ def get_last_date(self, now = None, n = 1, **kwargs) -> dt.datetime|list[dt.date
while len(last_date) < n:
this_month_times = self.get_times(this_month, **kwargs)
if len(this_month_times) > 0:
valid_time = [t for t in this_month_times if t <= now]
valid_time = [t for t in this_month_times if t < now]
valid_time.sort(reverse = True)
last_date.extend(valid_time)
elif this_month.start.year < 1900:
Expand All @@ -324,7 +328,7 @@ def get_last_ts(self, **kwargs) -> TimeStep:
if last_date is None:
return None

if hasattr(self, 'timestep'):
if hasattr(self, 'timestep') and self.timestep is not None:
timestep = self.timestep
else:
other_dates = self.get_last_date(now = last_date, n = 8, **kwargs)
Expand All @@ -338,13 +342,13 @@ def get_last_ts(self, **kwargs) -> TimeStep:
return timestep.from_date(last_date)

def estimate_timestep(self, date_sample = None, **kwargs) -> TimeStep:
if hasattr(self, 'timestep'):
if hasattr(self, 'timestep') and self.timestep is not None:
return self.timestep

if date_sample is None:
if date_sample is None or len(date_sample) == 0:
date_sample = self.get_last_date(n = 8, **kwargs)
elif len(date_sample) < 5:
other_dates = self.get_last_date(n = 8 - len(date_sample), now = min(date_sample), **kwargs)
other_dates = self.get_last_date(n = 8 - len(date_sample), now = min(date_sample), **kwargs) or []
date_sample = other_dates + date_sample

timestep = estimate_timestep(date_sample)
Expand Down Expand Up @@ -405,7 +409,7 @@ def get_first_ts(self, **kwargs) -> TimeStep:
if first_date is None:
return None

if hasattr(self, 'timestep'):
if hasattr(self, 'timestep') and self.timestep is not None:
timestep = self.timestep
else:
other_dates = self.get_first_date(start = first_date, n = 8, **kwargs)
Expand Down Expand Up @@ -457,7 +461,7 @@ def get_time_signature(self, timestep: Optional[TimeStep | dt.datetime]) -> dt.d
time = timestep
# calculating the length in this way is not perfect,
# but should work given that timesteps are always requested in order
if hasattr(self, 'timestep'):
if hasattr(self, 'timestep') and self.timestep is not None:
length = self.timestep.from_date(time).get_length()
elif hasattr(self, 'previous_requested_time'):
length = (time - self.previous_requested_time).days
Expand Down
10 changes: 8 additions & 2 deletions src/d3tools/timestepping/timestep.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ def estimate_timestep(sample) -> TimeStep:

def mode(arr: list): return max(set(arr), key = arr.count)

if len(sample) < 2:
return None

sample.sort()
all_diff = [(sample[i+1] - sample[i]).days for i in range(len(sample)-1)]
step_length = mode(all_diff)
Expand All @@ -140,8 +143,11 @@ def mode(arr: list): return max(set(arr), key = arr.count)
if np.isclose(step_length, 1):
return Day
elif np.isclose(step_length, 8):
return ViirsModisTimeStep
elif np.isclose(step_length, 10):
if sample[-1].month == 2 and sample[-1].day == 28:
return Dekad
else:
return ViirsModisTimeStep
elif 9 <= step_length <= 11:
return Dekad
elif 30 <= step_length <= 31:
return Month
Expand Down

0 comments on commit 24f9bb7

Please sign in to comment.