From 9977de4d2ebbd537e3ea394720c33e53dc1b113a Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 11 Jul 2024 20:18:56 +0200 Subject: [PATCH] downgrade ZIP dependency to a version that supports >65k files --- Cargo.lock | 70 ++++++++++++++++++++++++++++++------ Cargo.toml | 2 +- src/storage/archive_index.rs | 43 ++++++++++++++++------ 3 files changed, 94 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 40f828491..2618450bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2211,7 +2211,7 @@ dependencies = [ "gix-utils", "itoa 1.0.11", "thiserror", - "winnow", + "winnow 0.6.13", ] [[package]] @@ -2293,7 +2293,7 @@ dependencies = [ "smallvec", "thiserror", "unicode-bom", - "winnow", + "winnow 0.6.13", ] [[package]] @@ -2558,7 +2558,7 @@ dependencies = [ "itoa 1.0.11", "smallvec", "thiserror", - "winnow", + "winnow 0.6.13", ] [[package]] @@ -2682,7 +2682,7 @@ dependencies = [ "gix-utils", "maybe-async", "thiserror", - "winnow", + "winnow 0.6.13", ] [[package]] @@ -2715,7 +2715,7 @@ dependencies = [ "gix-validate", "memmap2", "thiserror", - "winnow", + "winnow 0.6.13", ] [[package]] @@ -3996,6 +3996,27 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02339744ee7253741199f897151b38e72257d13802d4ee837285cc2990a90845" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "681030a937600a36906c185595136d26abfebb4aa9c65701cefcaf8578bb982b" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.70", +] + [[package]] name = "num_threads" version = "0.1.7" @@ -4561,6 +4582,15 @@ dependencies = [ "yansi", ] +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit 0.21.1", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -6335,7 +6365,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit", + "toml_edit 0.22.15", ] [[package]] @@ -6347,6 +6377,17 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap 2.2.6", + "toml_datetime", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.22.15" @@ -6357,7 +6398,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "winnow", + "winnow 0.6.13", ] [[package]] @@ -7096,6 +7137,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "0.6.13" @@ -7175,9 +7225,9 @@ checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" [[package]] name = "zip" -version = "2.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775a2b471036342aa69bc5a602bc889cb0a06cda00477d0c69566757d5553d39" +checksum = "9cc23c04387f4da0374be4533ad1208cbb091d5c11d070dfef13676ad6497164" dependencies = [ "arbitrary", "bzip2", @@ -7185,7 +7235,7 @@ dependencies = [ "crossbeam-utils", "displaydoc", "indexmap 2.2.6", - "memchr", + "num_enum", "thiserror", ] diff --git a/Cargo.toml b/Cargo.toml index a6668887f..a9fbc1bab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,7 +63,7 @@ font-awesome-as-a-crate = { path = "crates/font-awesome-as-a-crate" } dashmap = "5.1.0" string_cache = "0.8.0" postgres-types = { version = "0.2", features = ["derive"] } -zip = {version = "2.1.3", default-features = false, features = ["bzip2"]} +zip = {version = "1.1.4", default-features = false, features = ["bzip2"]} bzip2 = "0.4.4" getrandom = "0.2.1" itertools = { version = "0.13.0", optional = true} diff --git a/src/storage/archive_index.rs b/src/storage/archive_index.rs index 183a322b1..3326716fc 100644 --- a/src/storage/archive_index.rs +++ b/src/storage/archive_index.rs @@ -119,31 +119,33 @@ mod tests { use std::io::Write; use zip::write::SimpleFileOptions; - fn create_test_archive() -> fs::File { + fn create_test_archive(file_count: u32) -> fs::File { let mut tf = tempfile::tempfile().unwrap(); let objectcontent: Vec = (0..255).collect(); let mut archive = zip::ZipWriter::new(tf); - archive - .start_file( - "testfile1", - SimpleFileOptions::default().compression_method(zip::CompressionMethod::Bzip2), - ) - .unwrap(); - archive.write_all(&objectcontent).unwrap(); + for i in 0..file_count { + archive + .start_file( + format!("testfile{i}"), + SimpleFileOptions::default().compression_method(zip::CompressionMethod::Bzip2), + ) + .unwrap(); + archive.write_all(&objectcontent).unwrap(); + } tf = archive.finish().unwrap(); tf } #[test] fn index_create_save_load_sqlite() { - let mut tf = create_test_archive(); + let mut tf = create_test_archive(1); let tempfile = tempfile::NamedTempFile::new().unwrap().into_temp_path(); create(&mut tf, &tempfile).unwrap(); - let fi = find_in_file(&tempfile, "testfile1").unwrap().unwrap(); + let fi = find_in_file(&tempfile, "testfile0").unwrap().unwrap(); assert_eq!(fi.range, FileRange::new(39, 459)); assert_eq!(fi.compression, CompressionAlgorithm::Bzip2); @@ -152,4 +154,25 @@ mod tests { .unwrap() .is_none()); } + + #[test] + fn archive_with_more_than_65k_files() { + let mut tf = create_test_archive(100_000); + + let tempfile = tempfile::NamedTempFile::new().unwrap().into_temp_path(); + create(&mut tf, &tempfile).unwrap(); + + let connection = Connection::open_with_flags( + tempfile, + OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX, + ) + .unwrap(); + let mut stmt = connection.prepare("SELECT count(*) FROM files").unwrap(); + + let count = stmt + .query_row([], |row| Ok(row.get::<_, usize>(0))) + .unwrap() + .unwrap(); + assert_eq!(count, 100_000); + } }