diff --git a/CHANGELOG.md b/CHANGELOG.md index a9eb3c3c..cd7d4e03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Methods for conversion `Interactions` to raw form and for getting raw interactions from `Dataset` ([#69](https://github.com/MobileTeleSystems/RecTools/pull/69)) +- `AvgRecPopularity (Average Recommendation Popularity)` to `metrics` ([#81](https://github.com/MobileTeleSystems/RecTools/pull/81)) ### Changed - Loosened `pandas`, `torch` and `torch-light` versions for `python >= 3.8` ([#58](https://github.com/MobileTeleSystems/RecTools/pull/58)) diff --git a/rectools/metrics/__init__.py b/rectools/metrics/__init__.py index 16fbb95e..d360558d 100644 --- a/rectools/metrics/__init__.py +++ b/rectools/metrics/__init__.py @@ -31,6 +31,7 @@ `metrics.MRR` `metrics.MeanInvUserFreq` `metrics.IntraListDiversity` +`metrics.AvgRecPopularity` `metrics.Serendipity` Tools @@ -49,6 +50,7 @@ ) from .diversity import IntraListDiversity from .novelty import MeanInvUserFreq +from .popularity import AvgRecPopularity from .ranking import MAP, MRR, NDCG from .scoring import calc_metrics from .serendipity import Serendipity @@ -64,6 +66,7 @@ "MRR", "MeanInvUserFreq", "IntraListDiversity", + "AvgRecPopularity", "Serendipity", "calc_metrics", "PairwiseDistanceCalculator", diff --git a/rectools/metrics/popularity.py b/rectools/metrics/popularity.py new file mode 100644 index 00000000..21b8adfe --- /dev/null +++ b/rectools/metrics/popularity.py @@ -0,0 +1,158 @@ +# Copyright 2024 MTS (Mobile Telesystems) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Popularity metrics.""" + +import typing as tp + +import pandas as pd + +from rectools import Columns +from rectools.metrics.base import MetricAtK +from rectools.utils import select_by_type + + +class AvgRecPopularity(MetricAtK): + r""" + Average Recommendations Popularity metric. + + Calculate the average popularity of the recommended items in each list, + where "popularity" of an item is the number of previous interactions + with this item. + + .. math:: + ARP@k = \frac{1}{\left|U_{t}\right|}\sum_{u\in U_{t}^{}}\frac{\sum_{i\in L_{u}}\phi (i)}{\left| L_{u} \right |} + + where + :math:`\phi (i)` is the number of previous interactions with item i. + :math:`|U_{t}|` is the number of users in the test set. + :math:`L_{u}` is the list of top k recommended items for user u. + + Parameters + ---------- + k : int + Number of items at the top of recommendations list that will be used to calculate metric. + + Examples + -------- + >>> reco = pd.DataFrame( + ... { + ... Columns.User: [1, 1, 2, 2, 2, 3, 3], + ... Columns.Item: [1, 2, 3, 1, 2, 3, 2], + ... Columns.Rank: [1, 2, 1, 2, 3, 1, 2], + ... } + ... ) + >>> prev_interactions = pd.DataFrame( + ... { + ... Columns.User: [1, 1, 2, 2, 3, 3], + ... Columns.Item: [1, 2, 1, 3, 1, 2], + ... } + ... ) + >>> AvgRecPopularity(k=1).calc_per_user(reco, prev_interactions).values + array([3., 1., 1.]) + >>> AvgRecPopularity(k=3).calc_per_user(reco, prev_interactions).values + array([2.5, 2. , 1.5]) + """ + + def calc(self, reco: pd.DataFrame, prev_interactions: pd.DataFrame) -> float: + """ + Calculate metric value. + + Parameters + ---------- + reco : pd.DataFrame + Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. + prev_interactions : pd.DataFrame + Table with previous user-item interactions, + with columns `Columns.User`, `Columns.Item`. + + Returns + ------- + float + Value of metric (average between users). + """ + per_user = self.calc_per_user(reco, prev_interactions) + return per_user.mean() + + def calc_per_user( + self, + reco: pd.DataFrame, + prev_interactions: pd.DataFrame, + ) -> pd.Series: + """ + Calculate metric values for all users. + + Parameters + ---------- + reco : pd.DataFrame + Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. + prev_interactions : pd.DataFrame + Table with previous user-item interactions, + with columns `Columns.User`, `Columns.Item`. + + Returns + ------- + pd.Series + Values of metric (index - user id, values - metric value for every user). + """ + item_popularity = prev_interactions[Columns.Item].value_counts() + item_popularity.name = "popularity" + + reco_k = reco.query(f"{Columns.Rank} <= @self.k") + reco_prepared = reco_k.join(item_popularity, on=Columns.Item, how="left") + reco_prepared["popularity"] = reco_prepared["popularity"].fillna(0) + + arp = reco_prepared.groupby(Columns.User)["popularity"].mean() + return arp + + +PopularityMetric = AvgRecPopularity + + +def calc_popularity_metrics( + metrics: tp.Dict[str, PopularityMetric], + reco: pd.DataFrame, + prev_interactions: pd.DataFrame, +) -> tp.Dict[str, float]: + """ + Calculate popularity metrics (only AvgRP now). + + Warning: It is not recommended to use this function directly. + Use `calc_metrics` instead. + + Parameters + ---------- + metrics : dict(str -> PopularityMetric) + Dict of metric objects to calculate, + where key is metric name and value is metric object. + reco : pd.DataFrame + Recommendations table with columns `Columns.User`, `Columns.Item`, `Columns.Rank`. + prev_interactions : pd.DataFrame + Table with previous user-item interactions, + with columns `Columns.User`, `Columns.Item`. + + Returns + ------- + dict(str->float) + Dictionary where keys are the same as keys in `metrics` + and values are metric calculation results. + """ + results = {} + + # ARP + pop_metrics: tp.Dict[str, AvgRecPopularity] = select_by_type(metrics, AvgRecPopularity) + for name, metric in pop_metrics.items(): + results[name] = metric.calc(reco, prev_interactions) + + return results diff --git a/rectools/metrics/scoring.py b/rectools/metrics/scoring.py index 26de47c2..9649b4ee 100644 --- a/rectools/metrics/scoring.py +++ b/rectools/metrics/scoring.py @@ -25,6 +25,7 @@ from .classification import ClassificationMetric, SimpleClassificationMetric, calc_classification_metrics from .diversity import DiversityMetric, calc_diversity_metrics from .novelty import NoveltyMetric, calc_novelty_metrics +from .popularity import PopularityMetric, calc_popularity_metrics from .ranking import RankingMetric, calc_ranking_metrics from .serendipity import SerendipityMetric, calc_serendipity_metrics @@ -131,6 +132,14 @@ def calc_metrics( # noqa # pylint: disable=too-many-branches novelty_values = calc_novelty_metrics(novelty_metrics, reco, prev_interactions) results.update(novelty_values) + # Popularity + popularity_metrics = select_by_type(metrics, PopularityMetric) + if popularity_metrics: + if prev_interactions is None: + raise ValueError("For calculating popularity metrics it's necessary to set 'prev_interactions'") + popularity_values = calc_popularity_metrics(popularity_metrics, reco, prev_interactions) + results.update(popularity_values) + # Diversity diversity_metrics = select_by_type(metrics, DiversityMetric) if diversity_metrics: diff --git a/tests/metrics/test_popularity.py b/tests/metrics/test_popularity.py new file mode 100644 index 00000000..ff99ea5f --- /dev/null +++ b/tests/metrics/test_popularity.py @@ -0,0 +1,106 @@ +# Copyright 2022 MTS (Mobile Telesystems) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pandas as pd +import pytest + +from rectools import Columns +from rectools.metrics.popularity import AvgRecPopularity + + +class TestAvgRecPopularity: + @pytest.fixture + def interactions(self) -> pd.DataFrame: + interactions = pd.DataFrame( + [["u1", "i1"], ["u1", "i2"], ["u2", "i1"], ["u2", "i3"], ["u3", "i1"], ["u3", "i2"]], + columns=[Columns.User, Columns.Item], + ) + return interactions + + @pytest.fixture + def recommendations(self) -> pd.DataFrame: + recommendations = pd.DataFrame( + [ + ["u1", "i1", 1], + ["u1", "i2", 2], + ["u2", "i3", 1], + ["u2", "i1", 2], + ["u2", "i2", 3], + ["u3", "i3", 1], + ["u3", "i2", 2], + ], + columns=[Columns.User, Columns.Item, Columns.Rank], + ) + return recommendations + + @pytest.mark.parametrize( + "k,expected", + ( + (1, pd.Series(index=["u1", "u2", "u3"], data=[3.0, 1.0, 1.0])), + (3, pd.Series(index=["u1", "u2", "u3"], data=[2.5, 2.0, 1.5])), + ), + ) + def test_correct_arp_values( + self, recommendations: pd.DataFrame, interactions: pd.DataFrame, k: int, expected: pd.Series + ) -> None: + arp = AvgRecPopularity(k) + + actual = arp.calc_per_user(recommendations, interactions) + pd.testing.assert_series_equal(actual, expected, check_names=False) + + actual_mean = arp.calc(recommendations, interactions) + assert actual_mean == expected.mean() + + def test_when_no_interactions( + self, + recommendations: pd.DataFrame, + ) -> None: + expected = pd.Series(index=recommendations[Columns.User].unique(), data=[0.0, 0.0, 0.0]) + empty_interactions = pd.DataFrame(columns=[Columns.User, Columns.Item], dtype=int) + arp = AvgRecPopularity(k=2) + + actual = arp.calc_per_user(recommendations, empty_interactions) + pd.testing.assert_series_equal(actual, expected, check_names=False) + + actual_mean = arp.calc(recommendations, empty_interactions) + assert actual_mean == expected.mean() + + @pytest.mark.parametrize( + "k,expected", + ( + (1, pd.Series(index=["u1", "u2", "u3"], data=[3.0, 1.0, 1.0])), + (3, pd.Series(index=["u1", "u2", "u3"], data=[2.5, np.divide(4, 3), 1.5])), + ), + ) + def test_when_new_item_in_reco(self, interactions: pd.DataFrame, k: int, expected: pd.Series) -> None: + reco = pd.DataFrame( + [ + ["u1", "i1", 1], + ["u1", "i2", 2], + ["u2", "i3", 1], + ["u2", "i1", 2], + ["u2", "i4", 3], + ["u3", "i3", 1], + ["u3", "i2", 2], + ], + columns=[Columns.User, Columns.Item, Columns.Rank], + ) + arp = AvgRecPopularity(k) + + actual = arp.calc_per_user(reco, interactions) + pd.testing.assert_series_equal(actual, expected, check_names=False) + + actual_mean = arp.calc(reco, interactions) + assert actual_mean == expected.mean() diff --git a/tests/metrics/test_scoring.py b/tests/metrics/test_scoring.py index d95c64eb..40cc71f5 100644 --- a/tests/metrics/test_scoring.py +++ b/tests/metrics/test_scoring.py @@ -23,6 +23,7 @@ MRR, NDCG, Accuracy, + AvgRecPopularity, IntraListDiversity, MeanInvUserFreq, PairwiseHammingDistanceCalculator, @@ -76,6 +77,7 @@ def test_success(self) -> None: "ndcg@1": NDCG(k=1, log_base=3), "mrr@1": MRR(k=1), "miuf": MeanInvUserFreq(k=3), + "arp": AvgRecPopularity(k=2), "ild": IntraListDiversity(k=3, distance_calculator=self.calculator), "serendipity": Serendipity(k=3), "custom": MetricAtK(k=1), @@ -92,6 +94,7 @@ def test_success(self) -> None: "ndcg@1": 0.25, "mrr@1": 0.25, "miuf": 0.125, + "arp": 2.75, "ild": 0.25, "serendipity": 0, } @@ -103,6 +106,7 @@ def test_success(self) -> None: (Precision(k=1), ["reco"]), (MAP(k=1), ["reco"]), (MeanInvUserFreq(k=1), ["reco"]), + (AvgRecPopularity(k=1), ["reco"]), (Serendipity(k=1), ["reco"]), (Serendipity(k=1), ["reco", "interactions"]), (Serendipity(k=1), ["reco", "interactions", "prev_interactions"]),