From 1802dde2631c7483725403df93d8ac94c38a79e2 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Thu, 20 Feb 2025 19:16:34 +0530 Subject: [PATCH] Add a table to record listen history deletion (#3188) * Add a table to record listen history deletion It is possible for users' to delete their entire listen history in one command, instead of individually deleting every listen. To handle this spark, record the user_id and the max created value until which listens have been deleted. --- admin/timescale/create_tables.sql | 6 ++++++ .../2025-02-19-add-user-listen-history-delete.sql | 9 +++++++++ listenbrainz/listenstore/dump_listenstore.py | 1 + listenbrainz/listenstore/timescale_listenstore.py | 2 ++ 4 files changed, 18 insertions(+) create mode 100644 admin/timescale/updates/2025-02-19-add-user-listen-history-delete.sql diff --git a/admin/timescale/create_tables.sql b/admin/timescale/create_tables.sql index 4ff5cfb1a2..3c5865d4df 100644 --- a/admin/timescale/create_tables.sql +++ b/admin/timescale/create_tables.sql @@ -28,6 +28,12 @@ CREATE TABLE listen_user_metadata ( SELECT create_hypertable('listen', 'listened_at', chunk_time_interval => INTERVAL '30 days'); +CREATE TABLE deleted_user_listen_history ( + id INTEGER GENERATED ALWAYS AS IDENTITY NOT NULL, + user_id INTEGER NOT NULL, + max_created TIMESTAMP WITH TIME ZONE NOT NULL +); + -- Playlists CREATE TABLE playlist.playlist ( diff --git a/admin/timescale/updates/2025-02-19-add-user-listen-history-delete.sql b/admin/timescale/updates/2025-02-19-add-user-listen-history-delete.sql new file mode 100644 index 0000000000..f1a9b6ebed --- /dev/null +++ b/admin/timescale/updates/2025-02-19-add-user-listen-history-delete.sql @@ -0,0 +1,9 @@ +BEGIN; + +CREATE TABLE deleted_user_listen_history ( + id INTEGER GENERATED ALWAYS AS IDENTITY NOT NULL, + user_id INTEGER NOT NULL, + max_created TIMESTAMP WITH TIME ZONE NOT NULL +); + +COMMIT; diff --git a/listenbrainz/listenstore/dump_listenstore.py b/listenbrainz/listenstore/dump_listenstore.py index 924ea84019..ecd8d9f897 100644 --- a/listenbrainz/listenstore/dump_listenstore.py +++ b/listenbrainz/listenstore/dump_listenstore.py @@ -556,5 +556,6 @@ def cleanup_listen_delete_metadata(self): self.log.info("Cleaning up listen_delete_metadata") with timescale.engine.connect() as connection: connection.execute(text("DELETE FROM listen_delete_metadata WHERE status != 'pending'")) + connection.execute(text("DELETE FROM deleted_user_listen_history")) connection.commit() self.log.info("Cleaning up listen_delete_metadata done!") diff --git a/listenbrainz/listenstore/timescale_listenstore.py b/listenbrainz/listenstore/timescale_listenstore.py index 648bf7c97b..3182caf4e2 100644 --- a/listenbrainz/listenstore/timescale_listenstore.py +++ b/listenbrainz/listenstore/timescale_listenstore.py @@ -668,9 +668,11 @@ def delete(self, user_id, created=None): WHERE user_id = :user_id """ query2 = """DELETE FROM listen WHERE user_id = :user_id AND created <= :created""" + query3 = """INSERT INTO deleted_user_listen_history (user_id, max_created) VALUES (:user_id, :created)""" try: ts_conn.execute(sqlalchemy.text(query1), {"user_id": user_id}) ts_conn.execute(sqlalchemy.text(query2), {"user_id": user_id, "created": created}) + ts_conn.execute(sqlalchemy.text(query3), {"user_id": user_id, "created": created}) ts_conn.commit() except psycopg2.OperationalError as e: self.log.error("Cannot delete listens for user: %s" % str(e))