Skip to content

Commit

Permalink
Revised to use max_timestamp instead of current time to take diff
Browse files Browse the repository at this point in the history
  • Loading branch information
myui committed Dec 11, 2024
1 parent 8288b31 commit d3422ee
Showing 1 changed file with 17 additions and 2 deletions.
19 changes: 17 additions & 2 deletions rtrec/utils/interactions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from collections import defaultdict
from typing import List, Optional, Any
import time, math
import logging
from datetime import datetime, timezone

from scipy.sparse import csr_matrix, csc_matrix

Expand Down Expand Up @@ -30,6 +32,7 @@ def __init__(self, min_value: int = -5, max_value: int = 10, decay_in_days: Opti
self.decay_rate = 1.0 - (math.log(2) / decay_in_days)
self.max_user_id = 0
self.max_item_id = 0
self.max_timestamp = 0.0

def get_decay_rate(self) -> Optional[float]:
"""
Expand Down Expand Up @@ -63,7 +66,7 @@ def _apply_decay(self, value: float, last_timestamp: float) -> float:
if self.decay_rate is None:
return value

elapsed_seconds = time.time() - last_timestamp
elapsed_seconds = self.max_timestamp - last_timestamp
elapsed_days = elapsed_seconds / 86400.0

return value * self.decay_rate ** elapsed_days # approximated exponential decay in time e^(-ln(2)/decay_in_days * elapsed_days)
Expand All @@ -78,6 +81,16 @@ def add_interaction(self, user_id: int, item_id: int, tstamp: float, delta: floa
delta (float): Change in interaction count (default is 1.0).
upsert (bool): Flag to update the interaction count if it already exists (default is False).
"""
# Validate the timestamp
current_unix_time = time.time()
if tstamp > current_unix_time + 180.0: # Allow for a 180-second buffer
current_rfc3339 = datetime.fromtimestamp(current_unix_time, tz=timezone.utc).isoformat() + "Z"
tstamp_rfc3339 = datetime.fromtimestamp(tstamp, tz=timezone.utc).isoformat() + "Z"
logging.warning(f"Timestamp {tstamp_rfc3339} is in the future. Current time is {current_rfc3339}")

# Update the maximum timestamp to avoid conflicts
self.max_timestamp = max(self.max_timestamp, tstamp + 1.0)

if upsert:
self.interactions[user_id][item_id] = (delta, tstamp)
else:
Expand All @@ -89,7 +102,9 @@ def add_interaction(self, user_id: int, item_id: int, tstamp: float, delta: floa

# Store the updated value with the current timestamp
self.interactions[user_id][item_id] = (new_value, tstamp)
# Track all unique item IDs
self.all_item_ids.add(item_id)
# Update maximum user and item IDs
self.max_user_id = max(self.max_user_id, user_id)
self.max_item_id = max(self.max_item_id, item_id)

Expand All @@ -105,7 +120,7 @@ def get_user_item_rating(self, user_id: int, item_id: int, default_rating: float
Returns:
float: The decayed interaction value for the specified user-item pair.
"""
current, last_timestamp = self.interactions[user_id].get(item_id, (default_rating, time.time()))
current, last_timestamp = self.interactions[user_id].get(item_id, (default_rating, 0.0))
if current == default_rating:
return default_rating # Return default if no interaction exists
return self._apply_decay(current, last_timestamp)
Expand Down

0 comments on commit d3422ee

Please sign in to comment.