Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split store #9

Merged
merged 10 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions common/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "walker-common"
description = "A common functionality for SBOM and CSAF walker"
version = "0.5.4"
version = "0.6.0"
edition = "2021"
authors = ["Jens Reimann <ctron@dentrassi.de>"]
license = "Apache-2.0"
Expand All @@ -21,16 +21,18 @@ clap = { version = "4.3", features = ["derive", "env"] }
csv = "1"
digest = "0.10.7"
flexible-time = "0.1"
futures-util = "0.3"
humantime = "2"
indicatif = "0.17.6"
log = "0.4"
openid = "0.12"
reqwest = { version = "0.11", features = ["json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
sha2 = "0.10.7"
thiserror = "1"
time = { version = "0.3", features = ["serde"] }
tokio = { version = "1", features = ["sync"] }
tokio = { version = "1", features = ["sync", "fs"] }
tracing = "0.1"
url = "2"

Expand All @@ -41,6 +43,7 @@ bzip2-rs = { version = "0.1", optional = true, features = ["rustc_1_51"] }

[features]
default = ["bzip2"]
_crypto-nettle = ["sequoia-openpgp/crypto-nettle"]

[package.metadata.cargo-all-features]
always_include_features = [
Expand Down
39 changes: 14 additions & 25 deletions common/src/changes.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use crate::fetcher::{self, Fetcher};
use std::collections::HashMap;
use std::time::SystemTime;
use time::OffsetDateTime;
use url::{ParseError, Url};

Expand All @@ -15,42 +13,33 @@ pub enum Error {
}

#[derive(Clone, Debug, PartialEq, Eq, serde::Deserialize)]
struct ChangeEntry {
file: String,
pub struct ChangeEntry {
pub file: String,
#[serde(with = "time::serde::iso8601")]
timestamp: OffsetDateTime,
pub timestamp: OffsetDateTime,
}

pub struct ChangeSource {
map: HashMap<String, SystemTime>,
pub entries: Vec<ChangeEntry>,
}

impl ChangeSource {
pub fn modified(&self, file: &str) -> Option<SystemTime> {
self.map.get(file).copied()
}

pub async fn retrieve(fetcher: &Fetcher, base_url: &Url) -> Result<Self, Error> {
let changes = fetcher
.fetch::<Option<String>>(base_url.join("changes.csv")?)
.fetch::<String>(base_url.join("changes.csv")?)
.await?;

log::info!("Found 'changes.csv', loading data");
log::info!("Found 'changes.csv', processing data");

let map = if let Some(changes) = changes {
let reader = csv::ReaderBuilder::new()
.delimiter(b',')
.has_headers(false)
.from_reader(changes.as_bytes());
let reader = csv::ReaderBuilder::new()
.delimiter(b',')
.has_headers(false)
.from_reader(changes.as_bytes());

reader
.into_deserialize::<ChangeEntry>()
.map(|entry| entry.map(|entry| (entry.file, entry.timestamp.into())))
.collect::<Result<HashMap<_, _>, _>>()?
} else {
HashMap::new()
};
let entries = reader
.into_deserialize::<ChangeEntry>()
.collect::<Result<Vec<_>, _>>()?;

Ok(Self { map })
Ok(Self { entries })
}
}
1 change: 1 addition & 0 deletions common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ pub mod progress;
pub mod retrieve;
pub mod sender;
pub mod since;
pub mod source;
pub mod utils;
pub mod validate;
69 changes: 69 additions & 0 deletions common/src/source/file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
use crate::retrieve::RetrievedDigest;
use anyhow::anyhow;
use bytes::Bytes;
use digest::Digest;
use futures_util::try_join;
use sha2::{Sha256, Sha512};
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use url::Url;

pub async fn read_optional(path: impl AsRef<Path>) -> Result<Option<String>, anyhow::Error> {
match tokio::fs::read_to_string(path).await {
Ok(data) => Ok(Some(data)),
Err(err) if err.kind() == ErrorKind::NotFound => Ok(None),
Err(err) => Err(err.into()),
}
}

pub fn to_path(url: &Url) -> Result<PathBuf, anyhow::Error> {
url.to_file_path()
.map_err(|()| anyhow!("Failed to convert URL to path: {url}"))
}

/// Read the signature file and digests
///
/// The expected locations are:
/// * `{base}.asc`
/// * `{base}.sha256`
/// * `{base}.sha512`
pub async fn read_sig_and_digests(
path: &Path,
data: &Bytes,
) -> anyhow::Result<(
Option<String>,
Option<RetrievedDigest<Sha256>>,
Option<RetrievedDigest<Sha512>>,
)> {
let (signature, sha256, sha512) = try_join!(
read_optional(format!("{}.asc", path.display())),
read_optional(format!("{}.sha256", path.display())),
read_optional(format!("{}.sha512", path.display())),
)?;

let sha256 = sha256
// take the first "word" from the line
.and_then(|expected| expected.split(' ').next().map(ToString::to_string))
.map(|expected| {
let mut actual = Sha256::new();
actual.update(data);
RetrievedDigest::<Sha256> {
expected,
actual: actual.finalize(),
}
});

let sha512 = sha512
// take the first "word" from the line
.and_then(|expected| expected.split(' ').next().map(ToString::to_string))
.map(|expected| {
let mut actual = Sha512::new();
actual.update(data);
RetrievedDigest::<Sha512> {
expected,
actual: actual.finalize(),
}
});

Ok((signature, sha256, sha512))
}
1 change: 1 addition & 0 deletions common/src/source/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod file;
27 changes: 27 additions & 0 deletions common/src/utils/url.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,35 @@
use url::Url;

/// Get a URL from something
///
/// ## Relative URLs
///
/// A entity can provide a relative URL. This is an optional operation, and is not implemented by default.
///
/// Implementors of this feature should have a clear definition what the meaning of the base is. For example:
/// the advisory's base is the distribution URL.
///
/// The combination of the provided actual base and relative URL must result in the same value as the actual URL.
pub trait Urlify {
/// The URL
fn url(&self) -> &Url;

fn relative_base_and_url(&self) -> Option<(&Url, String)> {
None
}

fn relative_url(&self) -> Option<String> {
self.relative_base_and_url().map(|(_, url)| url)
}

fn relative_base(&self) -> Option<&Url> {
self.relative_base_and_url().map(|(url, _)| url)
}

fn possibly_relative_url(&self) -> String {
self.relative_url()
.unwrap_or_else(|| self.url().to_string())
}
}

impl<T, E> Urlify for Result<T, E>
Expand Down
6 changes: 4 additions & 2 deletions csaf/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "csaf-walker"
description = "A library to work with CSAF data"
version = "0.5.4"
version = "0.6.0"
edition = "2021"
authors = ["Jens Reimann <ctron@dentrassi.de>"]
license = "Apache-2.0"
Expand All @@ -24,6 +24,7 @@ filetime = "0.2"
futures = "0.3"
humantime = "2"
log = "0.4.17"
percent-encoding = "2.3"
reqwest = { version = "0.11", features = ["json"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
Expand All @@ -32,8 +33,9 @@ thiserror = "1"
time = { version = "0.3.20", features = ["formatting", "parsing", "serde"] }
tokio = { version = "1", features = ["macros", "fs"] }
url = { version = "2.3.1", features = ["serde"] }
walkdir = "2.4"

walker-common = { version = "0.5", path = "../common" }
walker-common = { version = "0.6", path = "../common" }

sequoia-openpgp = { version = "1", default-features = false }

Expand Down
6 changes: 3 additions & 3 deletions csaf/csaf-cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "csaf-cli"
description = "A CLI to work with CSAF data"
version = "0.5.4"
version = "0.6.0"
edition = "2021"
authors = ["Jens Reimann <ctron@dentrassi.de>"]
license = "Apache-2.0"
Expand Down Expand Up @@ -32,8 +32,8 @@ thiserror = "1"
time = "0.3.20"
tokio = { version = "1", features = ["full"] }

walker-common = { version = "0.5.4", path = "../../common" }
csaf-walker = { version = "0.5.4", path = "..", default-features = false, features = ["csaf"] }
walker-common = { version = "0.6.0", path = "../../common" }
csaf-walker = { version = "0.6.0", path = "..", default-features = false, features = ["csaf"] }

# just there for the feature
openssl = { version = "0.10", optional = true }
Expand Down
19 changes: 14 additions & 5 deletions csaf/csaf-cli/src/cmd/discover.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::{cmd::DiscoverArguments, common::new_source};
use crate::{
cmd::{DiscoverArguments, FilterArguments},
common::{filter, new_source},
};
use csaf_walker::{discover::DiscoveredAdvisory, walker::Walker};
use std::convert::Infallible;
use walker_common::{cli::client::ClientArguments, progress::Progress};
Expand All @@ -11,17 +14,23 @@ pub struct Discover {

#[command(flatten)]
discover: DiscoverArguments,

#[command(flatten)]
filter: FilterArguments,
}

impl Discover {
pub async fn run(self, progress: Progress) -> anyhow::Result<()> {
Walker::new(new_source(self.discover, self.client).await?)
.with_progress(progress)
.walk(|discovered: DiscoveredAdvisory| async move {
println!("{}", discovered.url);
.walk(filter(
self.filter,
|discovered: DiscoveredAdvisory| async move {
println!("{}", discovered.url);

Ok::<_, Infallible>(())
})
Ok::<_, Infallible>(())
},
))
.await?;

Ok(())
Expand Down
6 changes: 5 additions & 1 deletion csaf/csaf-cli/src/cmd/download.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
cmd::{DiscoverArguments, SkipArguments, StoreArguments},
cmd::{DiscoverArguments, FilterArguments, SkipArguments, StoreArguments},
common::{walk_visitor, DiscoverConfig},
};
use csaf_walker::{
Expand All @@ -21,6 +21,9 @@ pub struct Download {
#[command(flatten)]
discover: DiscoverArguments,

#[command(flatten)]
filter: FilterArguments,

#[command(flatten)]
runner: RunnerArguments,

Expand Down Expand Up @@ -52,6 +55,7 @@ impl Download {
progress,
self.client,
DiscoverConfig::from(self.discover).with_since(since.since),
self.filter,
self.runner,
move |source| async move {
let base = base.clone();
Expand Down
35 changes: 30 additions & 5 deletions csaf/csaf-cli/src/cmd/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
use anyhow::Context;
use csaf_walker::visitors::send::SendVisitor;
use csaf_walker::visitors::store::StoreVisitor;
use csaf_walker::visitors::{filter::FilterConfig, send::SendVisitor, store::StoreVisitor};
use flexible_time::timestamp::StartTimestamp;
use reqwest::Url;
use std::collections::HashSet;
use std::path::PathBuf;
use walker_common::sender;
use walker_common::sender::provider::OpenIdTokenProviderConfigArguments;
use walker_common::sender::HttpSender;
use walker_common::sender::{self, provider::OpenIdTokenProviderConfigArguments, HttpSender};

pub mod discover;
pub mod download;
Expand All @@ -21,11 +19,38 @@ pub mod sync;
pub struct DiscoverArguments {
/// Source to scan from, will be suffixed with "/.well-known/csaf/provider-metadata.json" unless "--full" is used.
pub source: String,

#[arg(long)]
/// Treat the "source" as a full URL to the metadata.
pub full: bool,
}

#[derive(Debug, clap::Parser)]
#[command(next_help_heading = "Filters")]
pub struct FilterArguments {
#[arg(long)]
/// Distributions to ignore
pub ignore_distribution: Vec<String>,

#[arg(long)]
/// Prefix to ignore
pub ignore_prefix: Vec<String>,

#[arg(long)]
/// Ignore all non-matching prefixes
pub only_prefix: Vec<String>,
}

impl From<FilterArguments> for FilterConfig {
fn from(filter: FilterArguments) -> Self {
Self {
ignored_distributions: HashSet::from_iter(filter.ignore_distribution),
ignored_prefixes: filter.ignore_prefix,
only_prefixes: filter.only_prefix,
}
}
}

#[derive(Debug, clap::Parser)]
#[command(next_help_heading = "Storage")]
pub struct StoreArguments {
Expand Down
Loading