Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rsc: Add more dashboards and improve others #1619

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/rsc/.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"server_address": "0.0.0.0:3002",
"connection_pool_timeout": 60,
"standalone": false,
"active_store": "e9c2dac1-3882-442f-b8a4-1fc04582a003",
"active_store": "1f4a1ee7-d20f-4031-afe8-9dd002b4b0cf",
"log_directory": null,
"blob_eviction": {
"tick_rate": 60,
Expand Down
50 changes: 46 additions & 4 deletions rust/rsc/src/bin/rsc/dashboard.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::types::{
DashboardStatsLostOpportunityJob, DashboardStatsMostReusedJob, DashboardStatsOldestJob,
DashboardStatsResponse, DashboardStatsSizeRuntimeValueJob,
DashboardStatsBlobUseByStore, DashboardStatsLostOpportunityJob, DashboardStatsMostReusedJob,
DashboardStatsOldestJob, DashboardStatsResponse, DashboardStatsSizeRuntimeValueJob,
};
use axum::Json;
use rsc::database;
Expand All @@ -16,9 +16,11 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
savings: 0,
oldest_jobs: Vec::new(),
most_reused_jobs: Vec::new(),
most_time_saved_jobs: Vec::new(),
lost_opportunity_jobs: Vec::new(),
most_space_efficient_jobs: Vec::new(),
most_space_use_jobs: Vec::new(),
blob_use_by_store: Vec::new(),
};

let job_count = match database::count_jobs(db.as_ref()).await {
Expand Down Expand Up @@ -98,6 +100,24 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
}
};

let most_time_saved_jobs = match database::most_time_saved_jobs(db.as_ref()).await {
Ok(items) => {
let mut out = Vec::new();
for item in items {
out.push(DashboardStatsMostReusedJob {
label: item.label,
reuses: item.reuses,
savings: item.savings,
});
}
out
}
Err(err) => {
tracing::error! {%err, "Failed to lookup most time saved jobs"};
return Json(empty);
}
};

let lost_opportunity_jobs = match database::lost_opportuinty_jobs(db.as_ref()).await {
Ok(items) => {
let mut out = Vec::new();
Expand All @@ -107,6 +127,7 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
reuses: item.reuses,
misses: item.misses,
real_savings: item.real_savings,
lost_savings: item.lost_savings,
potential_savings: item.potential_savings,
});
}
Expand All @@ -126,7 +147,7 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
label: item.label,
runtime: item.runtime,
disk_usage: item.disk_usage,
ms_saved_per_byte: item.ms_saved_per_byte,
ns_saved_per_byte: item.ns_saved_per_byte,
});
}
out
Expand All @@ -145,7 +166,7 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
label: item.label,
runtime: item.runtime,
disk_usage: item.disk_usage,
ms_saved_per_byte: item.ms_saved_per_byte,
ns_saved_per_byte: item.ns_saved_per_byte,
});
}
out
Expand All @@ -156,15 +177,36 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
}
};

let blob_use_by_store = match database::blob_use_by_store(db.as_ref()).await {
Ok(items) => {
let mut out = Vec::new();
for item in items {
out.push(DashboardStatsBlobUseByStore {
store_id: item.store_id.to_string(),
store_type: item.store_type,
refs: item.refs,
blob_count: item.blob_count,
});
}
out
}
Err(err) => {
tracing::error! {%err, "Failed to lookup blob use by store"};
return Json(empty);
}
};

Json(DashboardStatsResponse {
job_count,
blob_count,
size,
savings,
oldest_jobs,
most_reused_jobs,
most_time_saved_jobs,
lost_opportunity_jobs,
most_space_efficient_jobs,
most_space_use_jobs,
blob_use_by_store,
})
}
13 changes: 12 additions & 1 deletion rust/rsc/src/bin/rsc/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ pub struct DashboardStatsLostOpportunityJob {
pub reuses: i32,
pub misses: i32,
pub real_savings: i64,
pub lost_savings: i64,
pub potential_savings: i64,
}

Expand All @@ -201,7 +202,15 @@ pub struct DashboardStatsSizeRuntimeValueJob {
pub label: String,
pub runtime: i64,
pub disk_usage: i64,
pub ms_saved_per_byte: i64,
pub ns_saved_per_byte: i64,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct DashboardStatsBlobUseByStore {
pub store_id: String,
pub store_type: String,
pub refs: i64,
pub blob_count: i64,
}

#[derive(Debug, Serialize, Deserialize)]
Expand All @@ -212,7 +221,9 @@ pub struct DashboardStatsResponse {
pub savings: i64,
pub oldest_jobs: Vec<DashboardStatsOldestJob>,
pub most_reused_jobs: Vec<DashboardStatsMostReusedJob>,
pub most_time_saved_jobs: Vec<DashboardStatsMostReusedJob>,
pub lost_opportunity_jobs: Vec<DashboardStatsLostOpportunityJob>,
pub most_space_efficient_jobs: Vec<DashboardStatsSizeRuntimeValueJob>,
pub most_space_use_jobs: Vec<DashboardStatsSizeRuntimeValueJob>,
pub blob_use_by_store: Vec<DashboardStatsBlobUseByStore>,
}
72 changes: 67 additions & 5 deletions rust/rsc/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,50 @@ pub async fn read_test_blob_stores<T: ConnectionTrait>(
.await
}

#[derive(Debug, FromQueryResult)]
pub struct BlobUseByStore {
pub store_id: Uuid,
pub store_type: String,
pub refs: i64,
pub blob_count: i64,
}

pub async fn blob_use_by_store<T: ConnectionTrait>(db: &T) -> Result<Vec<BlobUseByStore>, DbErr> {
BlobUseByStore::find_by_statement(Statement::from_string(
DbBackend::Postgres,
r#"
WITH
reference_count AS (
SELECT b.store_id, count(b.store_id) refs
FROM blob b
INNER JOIN (
SELECT blob_id id FROM output_file
UNION ALL SELECT stdout_blob_id FROM job
UNION ALL SELECT stderr_blob_id FROM job
) rbi
on b.id = rbi.id
GROUP BY b.store_id
),
blob_count AS (
SELECT bs.id, bs.type, bbs.count as blob_count
FROM blob_store bs
INNER JOIN (
SELECT store_id, count(store_id)
FROM blob
GROUP BY store_id
) bbs
ON bbs.store_id = bs.id
)
SELECT b.id store_id, b.type store_type, r.refs, b.blob_count
FROM reference_count r
INNER JOIN blob_count b
ON r.store_id = b.id
"#,
))
.all(db)
.await
}

// ---------- Update ----------

// ---------- Delete ----------
Expand Down Expand Up @@ -296,12 +340,29 @@ pub async fn most_reused_jobs<T: ConnectionTrait>(db: &T) -> Result<Vec<MostReus
.await
}

pub async fn most_time_saved_jobs<T: ConnectionTrait>(db: &T) -> Result<Vec<MostReusedJob>, DbErr> {
MostReusedJob::find_by_statement(Statement::from_string(
DbBackend::Postgres,
r#"
SELECT j.label, h.hits as reuses, CAST(round(h.hits * j.runtime) as BIGINT) as savings
FROM job_history h
INNER JOIN job j
ON j.hash = h.hash
ORDER BY savings DESC
LIMIT 30
"#,
))
.all(db)
.await
}

#[derive(Debug, FromQueryResult)]
pub struct LostOpportunityJobs {
pub label: String,
pub reuses: i32,
pub misses: i32,
pub real_savings: i64,
pub lost_savings: i64,
pub potential_savings: i64,
}

Expand All @@ -316,11 +377,12 @@ pub async fn lost_opportuinty_jobs<T: ConnectionTrait>(
h.hits as reuses,
h.misses - 1 as misses,
CAST(round(h.hits * j.runtime) as BIGINT) as real_savings,
CAST(round((h.misses - 1) * j.runtime) as BIGINT) as lost_savings,
CAST(round((h.hits + h.misses - 1) * j.runtime) as BIGINT) as potential_savings
FROM job_history h
INNER JOIN job j
ON j.hash = h.hash
ORDER BY potential_savings DESC
ORDER BY lost_savings DESC
LIMIT 30;
"#,
))
Expand All @@ -333,7 +395,7 @@ pub struct SizeRuntimeValueJob {
pub label: String,
pub runtime: i64,
pub disk_usage: i64,
pub ms_saved_per_byte: i64,
pub ns_saved_per_byte: i64,
}

pub async fn most_space_efficient_jobs<T: ConnectionTrait>(
Expand All @@ -346,10 +408,10 @@ pub async fn most_space_efficient_jobs<T: ConnectionTrait>(
j.label,
CAST(round(j.runtime) as BIGINT) as runtime,
j.size as disk_usage,
CAST(round(j.runtime / (j.size) * 1000) as BIGINT) as ms_saved_per_byte
CAST(round(j.runtime / (j.size) * 1000000000) as BIGINT) as ns_saved_per_byte
FROM job j
WHERE size IS NOT NULL
ORDER BY ms_saved_per_byte DESC
ORDER BY ns_saved_per_byte DESC
LIMIT 30;
"#,
))
Expand All @@ -367,7 +429,7 @@ pub async fn most_space_use_jobs<T: ConnectionTrait>(
j.label,
CAST(round(j.runtime) as BIGINT) as runtime,
j.size as disk_usage,
CAST(round(j.runtime / (j.size) * 1000) as BIGINT) as ms_saved_per_byte
CAST(round(j.runtime / (j.size) * 1000000000) as BIGINT) as ns_saved_per_byte
FROM job j
WHERE size IS NOT NULL
ORDER BY disk_usage DESC
Expand Down
Loading