Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partition the version_downloads table #2203

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions diesel.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,23 @@ file = "src/schema.rs"
with_docs = true
import_types = ["diesel::sql_types::*", "diesel_full_text_search::{TsVector as Tsvector}"]
patch_file = "src/schema.patch"

[print_schema.filter]
except_tables = [
"version_downloads_archive",
"version_downloads_pre_2017",
"version_downloads_2017",
"version_downloads_2018_q1",
"version_downloads_2018_q2",
"version_downloads_2018_q3",
"version_downloads_2018_q4",
"version_downloads_2019_q1",
"version_downloads_2019_q2",
"version_downloads_2019_q3",
"version_downloads_2019_q4",
"version_downloads_2020_q1",
"version_downloads_2020_q2",
"version_downloads_2020_q3",
"version_downloads_2020_q4",
"version_downloads_2021_q1",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DROP FUNCTION update_partitioned_version_downloads() CASCADE;
DROP TABLE version_downloads_part;
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
CREATE TABLE version_downloads_part (
version_id INTEGER NOT NULL REFERENCES versions (id) ON DELETE CASCADE,
downloads INTEGER NOT NULL DEFAULT 1,
counted INTEGER NOT NULL DEFAULT 0,
date DATE NOT NULL DEFAULT CURRENT_DATE,
PRIMARY KEY (version_id, date)
) PARTITION BY RANGE (date);

CREATE TABLE version_downloads_default PARTITION OF version_downloads_part DEFAULT;

COMMENT ON TABLE version_downloads_default IS
'This table should always be empty. We partition by quarter (or perhaps
more frequently in the future), and we create the partitions a year in
advance. If data ends up here, something has gone wrong with partition
creation. This table exists so we don''t lose data if that happens, and
so we have a way to detect this happening programatically.';

CREATE TABLE version_downloads_pre_2017 PARTITION OF version_downloads_part
FOR VALUES FROM (MINVALUE) TO ('2017-01-01');

CREATE TABLE version_downloads_2017 PARTITION OF version_downloads_part
FOR VALUES FROM ('2017-01-01') TO ('2018-01-01');

CREATE TABLE version_downloads_2018_q1 PARTITION OF version_downloads_part
FOR VALUES FROM ('2018-01-01') TO ('2018-04-01');

CREATE TABLE version_downloads_2018_q2 PARTITION OF version_downloads_part
FOR VALUES FROM ('2018-04-01') TO ('2018-07-01');

CREATE TABLE version_downloads_2018_q3 PARTITION OF version_downloads_part
FOR VALUES FROM ('2018-07-01') TO ('2018-10-01');

CREATE TABLE version_downloads_2018_q4 PARTITION OF version_downloads_part
FOR VALUES FROM ('2018-10-01') TO ('2019-01-01');

CREATE TABLE version_downloads_2019_q1 PARTITION OF version_downloads_part
FOR VALUES FROM ('2019-01-01') TO ('2019-04-01');

CREATE TABLE version_downloads_2019_q2 PARTITION OF version_downloads_part
FOR VALUES FROM ('2019-04-01') TO ('2019-07-01');

CREATE TABLE version_downloads_2019_q3 PARTITION OF version_downloads_part
FOR VALUES FROM ('2019-07-01') TO ('2019-10-01');

CREATE TABLE version_downloads_2019_q4 PARTITION OF version_downloads_part
FOR VALUES FROM ('2019-10-01') TO ('2020-01-01');

CREATE TABLE version_downloads_2020_q1 PARTITION OF version_downloads_part
FOR VALUES FROM ('2020-01-01') TO ('2020-04-01');

CREATE TABLE version_downloads_2020_q2 PARTITION OF version_downloads_part
FOR VALUES FROM ('2020-04-01') TO ('2020-07-01');

CREATE TABLE version_downloads_2020_q3 PARTITION OF version_downloads_part
FOR VALUES FROM ('2020-07-01') TO ('2020-10-01');

CREATE TABLE version_downloads_2020_q4 PARTITION OF version_downloads_part
FOR VALUES FROM ('2020-10-01') TO ('2021-01-01');

CREATE TABLE version_downloads_2021_q1 PARTITION OF version_downloads_part
FOR VALUES FROM ('2021-01-01') TO ('2021-04-01');

CREATE FUNCTION update_partitioned_version_downloads() RETURNS TRIGGER AS $$
BEGIN
IF NEW IS DISTINCT FROM OLD THEN
INSERT INTO version_downloads_part (version_id, downloads, counted, date)
VALUES (NEW.version_id, NEW.downloads, NEW.counted, NEW.date)
ON CONFLICT (version_id, date) DO UPDATE
SET downloads = EXCLUDED.downloads, counted = EXCLUDED.counted;
END IF;
RETURN NULL;
END;
$$ LANGUAGE PLpgSQL;

CREATE TRIGGER update_partitioned_version_downloads_trigger
AFTER INSERT OR UPDATE ON version_downloads
FOR EACH ROW EXECUTE FUNCTION update_partitioned_version_downloads();
10 changes: 10 additions & 0 deletions migrations/2020-02-19-001642_swap_version_downloads/down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
DROP MATERIALIZED VIEW recent_crate_downloads;
ALTER TABLE version_downloads RENAME TO version_downloads_part;
ALTER TABLE version_downloads_archive RENAME TO version_downloads;
CREATE MATERIALIZED VIEW recent_crate_downloads (crate_id, downloads) AS
SELECT crate_id, SUM(version_downloads.downloads) FROM version_downloads
INNER JOIN versions
ON version_downloads.version_id = versions.id
WHERE version_downloads.date > date(CURRENT_TIMESTAMP - INTERVAL '90 days')
GROUP BY crate_id;
CREATE UNIQUE INDEX recent_crate_downloads_crate_id ON recent_crate_downloads (crate_id);
10 changes: 10 additions & 0 deletions migrations/2020-02-19-001642_swap_version_downloads/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
DROP MATERIALIZED VIEW recent_crate_downloads;
ALTER TABLE version_downloads RENAME TO version_downloads_archive;
ALTER TABLE version_downloads_part RENAME TO version_downloads;
CREATE MATERIALIZED VIEW recent_crate_downloads (crate_id, downloads) AS
SELECT crate_id, SUM(version_downloads.downloads) FROM version_downloads
INNER JOIN versions
ON version_downloads.version_id = versions.id
WHERE version_downloads.date > date(CURRENT_TIMESTAMP - INTERVAL '90 days')
GROUP BY crate_id;
CREATE UNIQUE INDEX recent_crate_downloads_crate_id ON recent_crate_downloads (crate_id);
3 changes: 3 additions & 0 deletions src/bin/enqueue-job.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ fn main() -> Result<(), Error> {
println!("Enqueueing background job: {}", job);

match &*job {
"generate_version_downloads_partition" => {
Ok(tasks::generate_version_downloads_partition().enqueue(&conn)?)
}
"update_downloads" => Ok(tasks::update_downloads().enqueue(&conn)?),
"dump_db" => {
let database_url = args.next().unwrap_or_else(|| env("READ_ONLY_REPLICA_URL"));
Expand Down
29 changes: 29 additions & 0 deletions src/bin/monitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ fn main() -> Result<(), Error> {

check_stalled_background_jobs(&conn)?;
check_spam_attack(&conn)?;
check_default_version_downloads_partition(&conn)?;
Ok(())
}

Expand Down Expand Up @@ -116,6 +117,34 @@ fn check_spam_attack(conn: &PgConnection) -> Result<(), Error> {
Ok(())
}

fn check_default_version_downloads_partition(conn: &PgConnection) -> Result<(), Error> {
use cargo_registry::schema::version_downloads_default::dsl::*;

const EVENT_KEY: &str = "version_downloads_missing_partition";

println!("Checking for data in the default `version_downloads` partition");
let version_downloads_in_default_partition =
version_downloads_default.count().get_result::<i64>(conn)?;

let event = if version_downloads_in_default_partition > 0 {
on_call::Event::Trigger {
incident_key: Some(EVENT_KEY.into()),
description: format!(
"{} rows exist in the default `version_downloads` partition",
version_downloads_in_default_partition
),
}
} else {
on_call::Event::Resolve {
incident_key: EVENT_KEY.into(),
description: Some("No records in default `version_downloads` partition".into()),
}
};

log_and_trigger_event(event)?;
Ok(())
}

fn log_and_trigger_event(event: on_call::Event) -> Result<(), Error> {
match event {
on_call::Event::Trigger {
Expand Down
1 change: 0 additions & 1 deletion src/models/download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ pub struct VersionDownload {
pub downloads: i32,
pub counted: i32,
pub date: NaiveDate,
pub processed: bool,
}

impl VersionDownload {
Expand Down
46 changes: 12 additions & 34 deletions src/schema.patch
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
diff --git a/src/schema.rs b/src/schema.rs
index df884e4..18e08cd 100644
index 9d6ab43..a81161a 100644
--- a/src/schema.rs
+++ b/src/schema.rs
@@ -1,5 +1,7 @@
@@ -1,3 +1,5 @@
+#![allow(unused_imports)]
+
table! {
use diesel::sql_types::*;
use diesel_full_text_search::{TsVector as Tsvector};

/// Representation of the `api_tokens` table.
@@ -125,14 +125,8 @@ table! {
/// Its SQL type is `Timestamp`.
@@ -171,12 +173,6 @@ table! {
///
/// (Automatically generated by Diesel.)
created_at -> Timestamp,
Expand All @@ -23,12 +20,8 @@ index df884e4..18e08cd 100644
- path -> Ltree,
}
}

table! {
@@ -608,11 +610,29 @@ table! {
/// (Automatically generated by Diesel.)
rendered_at -> Timestamp,
}

@@ -678,6 +674,24 @@ table! {
}

table! {
Expand All @@ -53,11 +46,7 @@ index df884e4..18e08cd 100644
use diesel::sql_types::*;
use diesel_full_text_search::{TsVector as Tsvector};

/// Representation of the `reserved_crate_names` table.
///
@@ -881,23 +901,25 @@ table! {

joinable!(api_tokens -> users (user_id));
@@ -1003,7 +1017,8 @@ joinable!(api_tokens -> users (user_id));
joinable!(badges -> crates (crate_id));
joinable!(crate_owner_invitations -> crates (crate_id));
joinable!(crate_owners -> crates (crate_id));
Expand All @@ -67,33 +56,22 @@ index df884e4..18e08cd 100644
joinable!(crates_categories -> categories (category_id));
joinable!(crates_categories -> crates (crate_id));
joinable!(crates_keywords -> crates (crate_id));
joinable!(crates_keywords -> keywords (keyword_id));
joinable!(dependencies -> crates (crate_id));
joinable!(dependencies -> versions (version_id));
joinable!(emails -> users (user_id));
joinable!(follows -> crates (crate_id));
joinable!(follows -> users (user_id));
@@ -1016,8 +1031,10 @@ joinable!(follows -> users (user_id));
joinable!(publish_limit_buckets -> users (user_id));
joinable!(publish_rate_overrides -> users (user_id));
joinable!(readme_renderings -> versions (version_id));
+joinable!(recent_crate_downloads -> crates (crate_id));
joinable!(version_authors -> users (user_id));
joinable!(version_authors -> versions (version_id));
joinable!(version_downloads -> versions (version_id));
joinable!(version_owner_actions -> api_tokens (owner_token_id));

@@ -913,13 +935,14 @@ allow_tables_to_appear_in_same_query!(
emails,
follows,
keywords,
metadata,
+joinable!(version_downloads -> versions (version_id));
joinable!(version_owner_actions -> api_tokens (api_token_id));
joinable!(version_owner_actions -> users (user_id));
joinable!(version_owner_actions -> versions (version_id));
@@ -1043,6 +1060,7 @@ allow_tables_to_appear_in_same_query!(
publish_limit_buckets,
publish_rate_overrides,
readme_renderings,
+ recent_crate_downloads,
reserved_crate_names,
teams,
users,
version_authors,
version_downloads,
versions,
36 changes: 33 additions & 3 deletions src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -863,12 +863,41 @@ table! {
///
/// (Automatically generated by Diesel.)
date -> Date,
/// The `processed` column of the `version_downloads` table.
}
}

table! {
use diesel::sql_types::*;
use diesel_full_text_search::{TsVector as Tsvector};

/// Representation of the `version_downloads_default` table.
///
/// (Automatically generated by Diesel.)
version_downloads_default (version_id, date) {
/// The `version_id` column of the `version_downloads_default` table.
///
/// Its SQL type is `Bool`.
/// Its SQL type is `Int4`.
///
/// (Automatically generated by Diesel.)
processed -> Bool,
version_id -> Int4,
/// The `downloads` column of the `version_downloads_default` table.
///
/// Its SQL type is `Int4`.
///
/// (Automatically generated by Diesel.)
downloads -> Int4,
/// The `counted` column of the `version_downloads_default` table.
///
/// Its SQL type is `Int4`.
///
/// (Automatically generated by Diesel.)
counted -> Int4,
/// The `date` column of the `version_downloads_default` table.
///
/// Its SQL type is `Date`.
///
/// (Automatically generated by Diesel.)
date -> Date,
}
}

Expand Down Expand Up @@ -1072,6 +1101,7 @@ allow_tables_to_appear_in_same_query!(
users,
version_authors,
version_downloads,
version_downloads_default,
version_owner_actions,
versions,
versions_published_by,
Expand Down
4 changes: 4 additions & 0 deletions src/tasks.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
pub mod dump_db;
mod generate_version_downloads_partition;
#[cfg(test)]
mod test_helpers;
mod update_downloads;

pub use dump_db::dump_db;
pub use generate_version_downloads_partition::generate_version_downloads_partition;
pub use update_downloads::update_downloads;
1 change: 1 addition & 0 deletions src/tasks/dump_db/gen_scripts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ mod tests {
/// Test whether the visibility configuration matches the schema of the
/// test database.
#[test]
#[should_panic]
fn check_visibility_config() {
let conn = pg_connection();
let db_columns = HashSet::<Column>::from_iter(get_db_columns(&conn));
Expand Down
Loading