Skip to content

Commit

Permalink
Enable randomization (#184)
Browse files Browse the repository at this point in the history
* Fix randomization to work with negative weights

* Fix tests to take into account randomization

* Remove experiment prob from settings
  • Loading branch information
eu9ene authored Dec 21, 2020
1 parent f5693b7 commit 754b402
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 44 deletions.
7 changes: 0 additions & 7 deletions docs/randomized_tails.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,9 @@
# Randomized tail selection of addons

The `TAAR_EXPERIMENT_PROB` sets a probability that a user is in an experiment
to get randomized recommendations.

Randomized recommendations does not mean that recommendations are
fully randomized. Weights for each recommendation are normalized to
so that the sum of weights equals 1.0.

Using `numpy.random.choice` - we then select a non-uniform random
sample from the list of suggestions without replacement. Weights are
used to define a vector of probabilities.


By default - TAAR_EXPERIMENT_PROB is set to 0.0 which in effect
disables the randomization feature.
7 changes: 4 additions & 3 deletions taar/recommenders/randomizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,17 @@ def reorder_guids(guid_weight_tuples, size=None):
if guid_weight_tuples is None or len(guid_weight_tuples) == 0:
return []

weight_list = [weight for (guid, weight) in guid_weight_tuples]
weights = np.array([weight for (guid, weight) in guid_weight_tuples])
guids = [guid for (guid, weight) in guid_weight_tuples]
guid_map = dict(zip(guids, guid_weight_tuples))

if size is None:
size = len(guids)

# Normalize the weights so that they're probabilities
total_weight = sum(weight_list)
probabilities = [w * 1.0 / total_weight for w in weight_list]
# Scale first, weights can be negative (for example, collaborative filtering similarity scores)
scaled_weights = weights - np.min(weights) + np.finfo(float).eps
probabilities = scaled_weights / np.sum(scaled_weights)

choices = np.random.choice(guids, size=size, replace=False, p=probabilities)
return [guid_map[guid] for guid in choices]
32 changes: 9 additions & 23 deletions taar/recommenders/recommendation_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,13 @@
EnsembleRecommender,
is_test_client,
)
from taar.recommenders.randomizer import in_experiment, reorder_guids
from taar.recommenders.randomizer import reorder_guids
from taar.recommenders.debug import log_timer_info
from srgutil.interfaces import IMozLogging
from taar.recommenders.redis_cache import TAARCache

from taar.settings import TAAR_EXPERIMENT_PROB

import markus


metrics = markus.get_metrics("taar")


Expand Down Expand Up @@ -60,8 +57,6 @@ def __init__(self, ctx):

self._redis_cache = TAARCache.get_instance(self._ctx)

self._experiment_prob = ctx.get("TAAR_EXPERIMENT_PROB", TAAR_EXPERIMENT_PROB)

@metrics.timer_decorator("profile_recommendation")
def recommend(self, client_id, limit, extra_data={}):
"""Return recommendations for the given client.
Expand All @@ -79,8 +74,6 @@ def recommend(self, client_id, limit, extra_data={}):
with log_timer_info("redis read", self.logger):
extra_data["cache"] = self._redis_cache.cache_context()

results = None

if is_test_client(client_id):
# Just create a stub client_info blob
client_info = {
Expand All @@ -96,21 +89,14 @@ def recommend(self, client_id, limit, extra_data={}):
)
return []

if in_experiment(client_id, self._experiment_prob):
if results is None:
# Fetch back all possible whitelisted addons for this
# client
extra_data["guid_randomization"] = True
whitelist = extra_data["cache"]["whitelist"]
results = self._ensemble_recommender.recommend(
client_info, len(whitelist), extra_data
)
# Fetch back all possible whitelisted addons for this
# client
extra_data["guid_randomization"] = True
whitelist = extra_data["cache"]["whitelist"]
results = self._ensemble_recommender.recommend(
client_info, len(whitelist), extra_data
)

results = reorder_guids(results, limit)
else:
if results is None:
results = self._ensemble_recommender.recommend(
client_info, limit, extra_data
)
results = reorder_guids(results, limit)

return results
2 changes: 0 additions & 2 deletions taar/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
"TAAR_SIMILARITY_LRCURVES_KEY", default="test_similarity_lrcurves_key"
)

TAAR_EXPERIMENT_PROB = config("TAAR_EXPERIMENT_PROB", default=0.0)


# TAAR-lite configuration below

Expand Down
11 changes: 7 additions & 4 deletions tests/test_randomizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,23 @@ def test_reorder_guids():
np.random.seed(seed=42)

guid_weight_tuples = [
("guid1", 0.01),
("guid0", -0.60),
("guid1", -0.30),
("guid2", 0.09),
("guid3", 0.30),
("guid4", 0.60),
("guid4", 2.5),
]

# Run this 100 times to get the average ordering
results = []
limit = 4
for i in range(100):
results.append(reorder_guids(guid_weight_tuples))
results.append(reorder_guids(guid_weight_tuples, size=limit))

best_result = []
for i in range(4):
for i in range(limit):
best_result.append(most_frequent([row[i] for row in results])[0])

assert best_result == ["guid4", "guid3", "guid2", "guid1"]


Expand Down
14 changes: 9 additions & 5 deletions tests/test_recommendation_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import operator
from functools import reduce

import numpy as np
from markus import TIMING
from markus.testing import MetricsMock

Expand Down Expand Up @@ -116,19 +117,22 @@ def get(self, client_id):


def test_simple_recommendation(test_ctx):
with mock_install_mock_curated_data(test_ctx):
# Fix the random seed so that we get stable results between test
# runs
np.random.seed(seed=42)

with mock_install_mock_curated_data(test_ctx):
EXPECTED_RESULTS = [
("ghi", 3430.0),
("def", 3320.0),
("ijk", 3200.0),
("klm", 409.99999999999994),
("hij", 3100.0),
("ijk", 3200.0),
("ghi", 3430.0),
("lmn", 420.0),
("klm", 409.99999999999994),
("jkl", 400.0),
("abc", 23.0),
("fgh", 22.0),
("efg", 21.0),
("efg", 21.0)
]

with MetricsMock() as mm:
Expand Down

0 comments on commit 754b402

Please sign in to comment.