From af316b3caf6f395cc7943c4ea45c035600556db7 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Thu, 1 Aug 2024 17:20:25 -0600 Subject: [PATCH] rsc: Tier blobs into nested dirs to avoid file limits (#1617) * rsc: Tier blobs into nested dirs to avoid file limits * tune parameters * Update rust/rsc/src/bin/rsc/blob_store_impls.rs Co-authored-by: Colin Schmidt --------- Co-authored-by: Colin Schmidt --- rust/rsc/.config.json | 2 +- rust/rsc/src/bin/rsc/blob_store_impls.rs | 50 ++++++++++++++++++------ 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/rust/rsc/.config.json b/rust/rsc/.config.json index 88b715c76..533f8cc6b 100644 --- a/rust/rsc/.config.json +++ b/rust/rsc/.config.json @@ -3,7 +3,7 @@ "server_address": "0.0.0.0:3002", "connection_pool_timeout": 60, "standalone": false, - "active_store": "6a6ea9c9-a261-44b1-8ef7-305a12b04eab", + "active_store": "e9c2dac1-3882-442f-b8a4-1fc04582a003", "log_directory": null, "blob_eviction": { "tick_rate": 60, diff --git a/rust/rsc/src/bin/rsc/blob_store_impls.rs b/rust/rsc/src/bin/rsc/blob_store_impls.rs index 648145320..bafcf2be2 100644 --- a/rust/rsc/src/bin/rsc/blob_store_impls.rs +++ b/rust/rsc/src/bin/rsc/blob_store_impls.rs @@ -1,20 +1,37 @@ use crate::blob::*; use async_trait::async_trait; -use data_encoding::BASE64URL; use futures::stream::BoxStream; use rand_core::{OsRng, RngCore}; use sea_orm::prelude::Uuid; +use std::fmt::Write; use tokio::fs::File; use tokio::io::AsyncReadExt; use tokio::io::BufWriter; use tokio_util::bytes::Bytes; use tokio_util::io::StreamReader; -fn create_temp_filename() -> String { - let mut key = [0u8; 16]; - OsRng.fill_bytes(&mut key); - // URL must be used as files can't contain / - BASE64URL.encode(&key) +fn create_random_blob_path() -> std::path::PathBuf { + // 2 deep @ 8 bytes wide + let mut parts = [0u8; 10]; + OsRng.fill_bytes(&mut parts); + + let mut buf = std::path::PathBuf::from(""); + + // First 2 bytes represent the containing directories + for i in 0..2 { + let mut s = String::new(); + write!(&mut s, "{:02X}", parts[i]).unwrap(); + buf.push(s); + } + + // Next 8 bytes represent the file name + let mut s = String::new(); + for i in 2..10 { + write!(&mut s, "{:02X}", parts[i]).unwrap(); + } + buf.push(s); + + return buf; } #[derive(Debug, Clone)] @@ -36,20 +53,29 @@ impl BlobStore for LocalBlobStore { let reader = StreamReader::new(stream); futures::pin_mut!(reader); - let key = create_temp_filename(); - let path = std::path::Path::new(&self.root).join(key.clone()); - let mut file = BufWriter::new(File::create(path).await?); + let rel_path = create_random_blob_path(); + let path = std::path::Path::new(&self.root).join(rel_path.clone()); + tokio::fs::create_dir_all(path.parent().unwrap()).await?; + let mut file = BufWriter::new(File::create(path).await?); let written = tokio::io::copy(&mut reader, &mut file).await?; let size = match i64::try_from(written) { - Err(_) => { - tracing::error!(%written, "Size overflows i64, setting to i64::MAX instead"); + Err(err) => { + tracing::error!(%err, %written, "Size overflows i64, setting to i64::MAX instead"); i64::MAX } Ok(size) => size, }; + let key = match rel_path.into_os_string().into_string() { + Err(path) => { + tracing::error!("Cannot convert path to string, returning lossy path instead"); + path.to_string_lossy().to_string() + } + Ok(s) => s, + }; + Ok((key, size)) } @@ -80,7 +106,7 @@ impl BlobStore for TestBlobStore { &self, _stream: BoxStream<'a, Result>, ) -> Result<(String, i64), std::io::Error> { - Ok((create_temp_filename(), 0xDEADBEEF)) + Ok(("TestTestTest".to_string(), 0xDEADBEEF)) } async fn download_url(&self, key: String) -> String {