Skip to content

Commit

Permalink
Refactor listen deletion status column (#3187)
Browse files Browse the repository at this point in the history
* Refactor listen deletion status column

Change boolean deleted column of listen_delete_metadata to status column
which is an enum supporting pending, invalid and complete state. This helps
mark duplicate listen delete requests and non existent listen delete requests
as invalid and their subsequent cleanup after dumps finish. It's difficult
to distinguish between a new listen deletion request and an invalid listen
deletion request in the absence of an invalid.
  • Loading branch information
amCap1712 authored Feb 19, 2025
1 parent 4bdac3c commit 720968a
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 5 deletions.
4 changes: 2 additions & 2 deletions admin/timescale/create_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ CREATE TABLE listen_delete_metadata (
user_id INTEGER NOT NULL,
listened_at TIMESTAMP WITH TIME ZONE NOT NULL,
recording_msid UUID NOT NULL,
deleted BOOLEAN NOT NULL DEFAULT FALSE,
status listen_delete_metadata_status_enum NOT NULL DEFAULT 'pending',
listen_created TIMESTAMP WITH TIME ZONE
CHECK ( deleted IS FALSE OR (deleted IS TRUE AND listen_created IS NOT NULL) )
CHECK ( status = 'invalid' OR status = 'pending' OR (status = 'complete' AND listen_created IS NOT NULL) )
);

CREATE TABLE listen_user_metadata (
Expand Down
1 change: 1 addition & 0 deletions admin/timescale/create_types.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@ BEGIN;

CREATE TYPE mbid_mapping_match_type_enum AS ENUM('no_match', 'low_quality', 'med_quality', 'high_quality', 'exact_match');
CREATE TYPE lb_tag_radio_source_type_enum AS ENUM ('recording', 'artist', 'release-group');
CREATE TYPE listen_delete_metadata_status_enum AS ENUM ('pending', 'invalid', 'complete');

COMMIT;
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CREATE TYPE listen_delete_metadata_status_enum AS ENUM ('pending', 'invalid', 'complete');

BEGIN;

ALTER TABLE listen_delete_metadata ADD COLUMN status listen_delete_metadata_status_enum NOT NULL DEFAULT 'pending';
ALTER TABLE listen_delete_metadata
ADD CONSTRAINT listen_delete_metadata_status_created_constraint
CHECK ( status = 'invalid' OR status = 'pending' OR (status = 'complete' AND listen_created IS NOT NULL) );

UPDATE listen_delete_metadata SET status = CASE WHEN deleted IS TRUE 'complete' ELSE 'pending' END;

ALTER TABLE listen_delete_metadata DROP CONSTRAINT listen_delete_metadata_deleted_created_constraint;
ALTER TABLE listen_delete_metadata DROP COLUMN deleted;

COMMIT;
2 changes: 1 addition & 1 deletion listenbrainz/listenstore/dump_listenstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,6 @@ def cleanup_listen_delete_metadata(self):
""" Cleanup listen delete metadata after spark full dump is complete """
self.log.info("Cleaning up listen_delete_metadata")
with timescale.engine.connect() as connection:
connection.execute(text("DELETE FROM listen_delete_metadata WHERE deleted"))
connection.execute(text("DELETE FROM listen_delete_metadata WHERE status != 'pending'"))
connection.commit()
self.log.info("Cleaning up listen_delete_metadata done!")
13 changes: 11 additions & 2 deletions listenbrainz/listenstore/timescale_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def delete_listens():
AND l.user_id = ldm.user_id
AND l.listened_at = ldm.listened_at
AND l.recording_msid = ldm.recording_msid
AND NOT ldm.deleted
AND ldm.status = 'pending'
RETURNING ldm.id, l.user_id, l.created
), update_counts AS (
UPDATE listen_user_metadata lm
Expand All @@ -120,7 +120,7 @@ def delete_listens():
WHERE lm.user_id = uc.user_id
)
UPDATE listen_delete_metadata ldm
SET deleted = 't'
SET status = 'complete'
, listen_created = dl.created
FROM deleted_listens dl
WHERE ldm.id = dl.id
Expand Down Expand Up @@ -193,6 +193,12 @@ def delete_listens():
FROM calculate_new_ts mt
WHERE lm.user_id = mt.user_id
"""
mark_invalid_rows_query = """
UPDATE listen_delete_metadata
SET status = 'invalid'
WHERE id <= :max_id
AND status = 'pending'
"""

with timescale.engine.begin() as connection:
result = connection.execute(text(select_max_id))
Expand All @@ -214,6 +220,9 @@ def delete_listens():
logger.info("Update maximum listen timestamp affected by deleted listens")
connection.execute(text(update_listen_max_ts), {"max_id": max_id})

logger.info("Cleanup listen delete metadata table")
connection.execute(text(mark_invalid_rows_query), {"max_id": max_id})

logger.info("Completed deleting listens and updating affected metadata")


Expand Down

0 comments on commit 720968a

Please sign in to comment.