Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add temporary command to fix broken archive indexes #2552

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

85 changes: 82 additions & 3 deletions src/bin/cratesfyi.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
use std::env;
use std::fmt::Write;
use std::net::SocketAddr;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc;
use std::{env, fs};

use anyhow::{anyhow, Context as _, Error, Result};
use axum::async_trait;
use clap::{Parser, Subcommand, ValueEnum};
use docs_rs::cdn::CdnBackend;
use docs_rs::db::{self, add_path_into_database, Overrides, Pool, PoolClient};
use docs_rs::repositories::RepositoryStatsUpdater;
use docs_rs::storage::{rustdoc_archive_path, source_archive_path, PathNotFoundError};
use docs_rs::utils::{
get_config, get_crate_pattern_and_priority, list_crate_priorities, queue_builder,
remove_crate_priority, set_config, set_crate_priority, ConfigName,
remove_crate_priority, set_config, set_crate_priority, spawn_blocking, ConfigName,
};
use docs_rs::{
start_background_metrics_webserver, start_web_server, AsyncStorage, BuildQueue, Config,
Expand All @@ -23,6 +24,7 @@ use docs_rs::{
use futures_util::StreamExt;
use humantime::Duration;
use once_cell::sync::OnceCell;
use rusqlite::{Connection, OpenFlags};
use sentry::TransactionContext;
use tokio::runtime::{Builder, Runtime};
use tracing_log::LogTracer;
Expand Down Expand Up @@ -509,6 +511,9 @@ enum DatabaseSubcommand {
/// temporary commant to update the `crates.latest_version_id` field
UpdateLatestVersionId,

/// temporary command to rebuild a subset of the archive indexes
FixBrokenArchiveIndexes,

/// Updates Github/Gitlab stats for crates.
UpdateRepositoryFields,

Expand Down Expand Up @@ -567,6 +572,80 @@ impl DatabaseSubcommand {
.context("Failed to run database migrations")?
}

Self::FixBrokenArchiveIndexes => {
let pool = ctx.pool()?;
let build_queue = ctx.build_queue()?;
ctx.runtime()?
.block_on(async {
let storage = ctx.async_storage().await?;
let mut conn = pool.get_async().await?;
let mut result_stream = sqlx::query!(
"
SELECT c.name, r.version, r.release_time
FROM crates c, releases r
WHERE c.id = r.crate_id
ORDER BY r.id
"
)
.fetch(&mut *conn);

while let Some(row) = result_stream.next().await {
let row = row?;

println!(
"checking index for {} {} ({:?})",
row.name, row.version, row.release_time
);

for path in &[
rustdoc_archive_path(&row.name, &row.version),
source_archive_path(&row.name, &row.version),
] {
let local_archive_index_filename = match storage
.download_archive_index(path, 42)
.await
{
Ok(path) => path,
Err(err)
if err.downcast_ref::<PathNotFoundError>().is_some() =>
{
continue
}
Err(err) => return Err(err),
};

let count = {
let connection = Connection::open_with_flags(
&local_archive_index_filename,
OpenFlags::SQLITE_OPEN_READ_ONLY
| OpenFlags::SQLITE_OPEN_NO_MUTEX,
)?;
let mut stmt =
connection.prepare("SELECT count(*) FROM files")?;

stmt.query_row([], |row| Ok(row.get::<_, usize>(0)))??
};

fs::remove_file(&local_archive_index_filename)?;

if count >= 65000 {
println!("...big index, queueing rebuild");
spawn_blocking({
let build_queue = build_queue.clone();
let name = row.name.clone();
let version = row.version.clone();
move || build_queue.add_crate(&name, &version, 5, None)
})
.await?;
}
}
}

Ok::<(), anyhow::Error>(())
})
.context("Failed to queue rebuilds for big documentation sizes")?
}

Self::UpdateLatestVersionId => {
let pool = ctx.pool()?;
ctx.runtime()?
Expand All @@ -581,7 +660,7 @@ impl DatabaseSubcommand {
while let Some(row) = result_stream.next().await {
let row = row?;

println!("handling crate {}", row.name);
println!("handling crate {} ", row.name);

db::update_latest_version_id(&mut update_conn, row.id).await?;
}
Expand Down
8 changes: 4 additions & 4 deletions src/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ type FileRange = RangeInclusive<u64>;

#[derive(Debug, thiserror::Error)]
#[error("path not found")]
pub(crate) struct PathNotFoundError;
pub struct PathNotFoundError;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct Blob {
Expand Down Expand Up @@ -304,7 +304,7 @@ impl AsyncStorage {
}

#[instrument]
pub(super) async fn download_archive_index(
pub async fn download_archive_index(
&self,
archive_path: &str,
latest_build_id: i32,
Expand Down Expand Up @@ -823,11 +823,11 @@ fn detect_mime(file_path: impl AsRef<Path>) -> &'static str {
}
}

pub(crate) fn rustdoc_archive_path(name: &str, version: &str) -> String {
pub fn rustdoc_archive_path(name: &str, version: &str) -> String {
format!("rustdoc/{name}/{version}.zip")
}

pub(crate) fn source_archive_path(name: &str, version: &str) -> String {
pub fn source_archive_path(name: &str, version: &str) -> String {
format!("sources/{name}/{version}.zip")
}

Expand Down
2 changes: 1 addition & 1 deletion src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ where
/// })
/// .await?
/// ```
pub(crate) async fn spawn_blocking<F, R>(f: F) -> Result<R>
pub async fn spawn_blocking<F, R>(f: F) -> Result<R>
where
F: FnOnce() -> Result<R> + Send + 'static,
R: Send + 'static,
Expand Down
Loading