diff --git a/admin/timescale/create_tables.sql b/admin/timescale/create_tables.sql index 987a6412f2..4ff5cfb1a2 100644 --- a/admin/timescale/create_tables.sql +++ b/admin/timescale/create_tables.sql @@ -13,9 +13,9 @@ CREATE TABLE listen_delete_metadata ( user_id INTEGER NOT NULL, listened_at TIMESTAMP WITH TIME ZONE NOT NULL, recording_msid UUID NOT NULL, - deleted BOOLEAN NOT NULL DEFAULT FALSE, + status listen_delete_metadata_status_enum NOT NULL DEFAULT 'pending', listen_created TIMESTAMP WITH TIME ZONE - CHECK ( deleted IS FALSE OR (deleted IS TRUE AND listen_created IS NOT NULL) ) + CHECK ( status = 'invalid' OR status = 'pending' OR (status = 'complete' AND listen_created IS NOT NULL) ) ); CREATE TABLE listen_user_metadata ( diff --git a/admin/timescale/create_types.sql b/admin/timescale/create_types.sql index 806e3a21ec..9839f242dd 100644 --- a/admin/timescale/create_types.sql +++ b/admin/timescale/create_types.sql @@ -2,5 +2,6 @@ BEGIN; CREATE TYPE mbid_mapping_match_type_enum AS ENUM('no_match', 'low_quality', 'med_quality', 'high_quality', 'exact_match'); CREATE TYPE lb_tag_radio_source_type_enum AS ENUM ('recording', 'artist', 'release-group'); +CREATE TYPE listen_delete_metadata_status_enum AS ENUM ('pending', 'invalid', 'complete'); COMMIT; diff --git a/admin/timescale/updates/2025-02-19-change-listen-delete-metadata-status.sql b/admin/timescale/updates/2025-02-19-change-listen-delete-metadata-status.sql new file mode 100644 index 0000000000..1e16bb13b6 --- /dev/null +++ b/admin/timescale/updates/2025-02-19-change-listen-delete-metadata-status.sql @@ -0,0 +1,15 @@ +CREATE TYPE listen_delete_metadata_status_enum AS ENUM ('pending', 'invalid', 'complete'); + +BEGIN; + +ALTER TABLE listen_delete_metadata ADD COLUMN status listen_delete_metadata_status_enum NOT NULL DEFAULT 'pending'; +ALTER TABLE listen_delete_metadata + ADD CONSTRAINT listen_delete_metadata_status_created_constraint + CHECK ( status = 'invalid' OR status = 'pending' OR (status = 'complete' AND listen_created IS NOT NULL) ); + +UPDATE listen_delete_metadata SET status = CASE WHEN deleted IS TRUE 'complete' ELSE 'pending' END; + +ALTER TABLE listen_delete_metadata DROP CONSTRAINT listen_delete_metadata_deleted_created_constraint; +ALTER TABLE listen_delete_metadata DROP COLUMN deleted; + +COMMIT; diff --git a/listenbrainz/listenstore/dump_listenstore.py b/listenbrainz/listenstore/dump_listenstore.py index 5217126bae..924ea84019 100644 --- a/listenbrainz/listenstore/dump_listenstore.py +++ b/listenbrainz/listenstore/dump_listenstore.py @@ -555,6 +555,6 @@ def cleanup_listen_delete_metadata(self): """ Cleanup listen delete metadata after spark full dump is complete """ self.log.info("Cleaning up listen_delete_metadata") with timescale.engine.connect() as connection: - connection.execute(text("DELETE FROM listen_delete_metadata WHERE deleted")) + connection.execute(text("DELETE FROM listen_delete_metadata WHERE status != 'pending'")) connection.commit() self.log.info("Cleaning up listen_delete_metadata done!") diff --git a/listenbrainz/listenstore/timescale_utils.py b/listenbrainz/listenstore/timescale_utils.py index b6fa261e3b..1e9cd81e4d 100644 --- a/listenbrainz/listenstore/timescale_utils.py +++ b/listenbrainz/listenstore/timescale_utils.py @@ -106,7 +106,7 @@ def delete_listens(): AND l.user_id = ldm.user_id AND l.listened_at = ldm.listened_at AND l.recording_msid = ldm.recording_msid - AND NOT ldm.deleted + AND ldm.status = 'pending' RETURNING ldm.id, l.user_id, l.created ), update_counts AS ( UPDATE listen_user_metadata lm @@ -120,7 +120,7 @@ def delete_listens(): WHERE lm.user_id = uc.user_id ) UPDATE listen_delete_metadata ldm - SET deleted = 't' + SET status = 'complete' , listen_created = dl.created FROM deleted_listens dl WHERE ldm.id = dl.id @@ -193,6 +193,12 @@ def delete_listens(): FROM calculate_new_ts mt WHERE lm.user_id = mt.user_id """ + mark_invalid_rows_query = """ + UPDATE listen_delete_metadata + SET status = 'invalid' + WHERE id <= :max_id + AND status = 'pending' + """ with timescale.engine.begin() as connection: result = connection.execute(text(select_max_id)) @@ -214,6 +220,9 @@ def delete_listens(): logger.info("Update maximum listen timestamp affected by deleted listens") connection.execute(text(update_listen_max_ts), {"max_id": max_id}) + logger.info("Cleanup listen delete metadata table") + connection.execute(text(mark_invalid_rows_query), {"max_id": max_id}) + logger.info("Completed deleting listens and updating affected metadata")