From 69b74a39b997a76bd2d6d471b03340bde95d411f Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 24 Apr 2025 16:12:23 -0400 Subject: [PATCH 1/8] Introduce sled-agent-config-reconciler skeleton --- Cargo.lock | 33 ++ Cargo.toml | 3 + common/Cargo.toml | 1 + common/src/disk.rs | 9 + sled-agent/Cargo.toml | 1 + sled-agent/config-reconciler/Cargo.toml | 40 ++ .../src/dataset_serialization_task.rs | 162 +++++++ .../{ => config-reconciler}/src/dump_setup.rs | 72 +-- .../config-reconciler/src/external_disks.rs | 3 + sled-agent/config-reconciler/src/handle.rs | 417 ++++++++++++++++++ .../config-reconciler/src/internal_disks.rs | 309 +++++++++++++ sled-agent/config-reconciler/src/ledger.rs | 172 ++++++++ sled-agent/config-reconciler/src/lib.rs | 80 ++++ sled-agent/config-reconciler/src/raw_disks.rs | 80 ++++ .../config-reconciler/src/reconciler_task.rs | 246 +++++++++++ .../src/sled_agent_facilities.rs | 69 +++ sled-agent/src/lib.rs | 1 - sled-agent/src/storage_monitor.rs | 5 +- sled-agent/src/support_bundle/storage.rs | 17 +- sled-agent/types/src/lib.rs | 1 + sled-agent/types/src/support_bundle.rs | 20 + 21 files changed, 1690 insertions(+), 51 deletions(-) create mode 100644 sled-agent/config-reconciler/Cargo.toml create mode 100644 sled-agent/config-reconciler/src/dataset_serialization_task.rs rename sled-agent/{ => config-reconciler}/src/dump_setup.rs (97%) create mode 100644 sled-agent/config-reconciler/src/external_disks.rs create mode 100644 sled-agent/config-reconciler/src/handle.rs create mode 100644 sled-agent/config-reconciler/src/internal_disks.rs create mode 100644 sled-agent/config-reconciler/src/ledger.rs create mode 100644 sled-agent/config-reconciler/src/lib.rs create mode 100644 sled-agent/config-reconciler/src/raw_disks.rs create mode 100644 sled-agent/config-reconciler/src/reconciler_task.rs create mode 100644 sled-agent/config-reconciler/src/sled_agent_facilities.rs create mode 100644 sled-agent/types/src/support_bundle.rs diff --git a/Cargo.lock b/Cargo.lock index a14b71ecddc..6fc4d9910c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7093,6 +7093,7 @@ dependencies = [ "futures", "hex", "http", + "id-map", "ipnetwork", "libc", "macaddr", @@ -7772,6 +7773,7 @@ dependencies = [ "sha3", "sled-agent-api", "sled-agent-client", + "sled-agent-config-reconciler", "sled-agent-types", "sled-diagnostics", "sled-hardware", @@ -11473,6 +11475,37 @@ dependencies = [ "uuid", ] +[[package]] +name = "sled-agent-config-reconciler" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "camino", + "camino-tempfile", + "chrono", + "derive_more", + "dropshot 0.16.0", + "glob", + "id-map", + "illumos-utils", + "key-manager", + "nexus-sled-agent-shared", + "omicron-common", + "omicron-test-utils", + "omicron-uuid-kinds", + "sled-agent-api", + "sled-agent-types", + "sled-hardware", + "sled-storage", + "slog", + "slog-error-chain", + "thiserror 1.0.69", + "tokio", + "tufaceous-artifact", + "zone 0.3.1", +] + [[package]] name = "sled-agent-types" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 5e4db73fbda..2c8fb98d10f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,6 +124,7 @@ members = [ "sled-agent", "sled-agent/api", "sled-agent/bootstrap-agent-api", + "sled-agent/config-reconciler", "sled-agent/repo-depot-api", "sled-agent/types", "sled-diagnostics", @@ -275,6 +276,7 @@ default-members = [ "sled-agent", "sled-agent/api", "sled-agent/bootstrap-agent-api", + "sled-agent/config-reconciler", "sled-agent/repo-depot-api", "sled-agent/types", "sled-diagnostics", @@ -662,6 +664,7 @@ similar-asserts = "1.7.0" sled = "=0.34.7" sled-agent-api = { path = "sled-agent/api" } sled-agent-client = { path = "clients/sled-agent-client" } +sled-agent-config-reconciler = { path = "sled-agent/config-reconciler" } sled-agent-types = { path = "sled-agent/types" } sled-diagnostics = { path = "sled-diagnostics" } sled-hardware = { path = "sled-hardware" } diff --git a/common/Cargo.toml b/common/Cargo.toml index da1b648c333..2e1cb217743 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -25,6 +25,7 @@ dropshot.workspace = true futures.workspace = true hex.workspace = true http.workspace = true +id-map.workspace = true ipnetwork.workspace = true lldp_protocol.workspace = true macaddr.workspace = true diff --git a/common/src/disk.rs b/common/src/disk.rs index adef569d5ab..dad0ec7a2fc 100644 --- a/common/src/disk.rs +++ b/common/src/disk.rs @@ -7,6 +7,7 @@ use anyhow::bail; use camino::{Utf8Path, Utf8PathBuf}; use daft::Diffable; +use id_map::IdMappable; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::ZpoolUuid; @@ -337,6 +338,14 @@ pub struct DatasetConfig { pub inner: SharedDatasetConfig, } +impl IdMappable for DatasetConfig { + type Id = DatasetUuid; + + fn id(&self) -> Self::Id { + self.id + } +} + #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, )] diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 0c3bd1a42cf..0f1af733b25 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -83,6 +83,7 @@ sha2.workspace = true sha3.workspace = true sled-agent-api.workspace = true sled-agent-client.workspace = true +sled-agent-config-reconciler.workspace = true sled-agent-types.workspace = true sled-diagnostics.workspace = true sled-hardware.workspace = true diff --git a/sled-agent/config-reconciler/Cargo.toml b/sled-agent/config-reconciler/Cargo.toml new file mode 100644 index 00000000000..c9079ffff2e --- /dev/null +++ b/sled-agent/config-reconciler/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "sled-agent-config-reconciler" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +async-trait.workspace = true +camino.workspace = true +camino-tempfile.workspace = true +chrono.workspace = true +derive_more.workspace = true +dropshot.workspace = true +glob.workspace = true +id-map.workspace = true +illumos-utils.workspace = true +key-manager.workspace = true +nexus-sled-agent-shared.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +sled-agent-api.workspace = true +sled-agent-types.workspace = true +sled-hardware.workspace = true +sled-storage.workspace = true +slog.workspace = true +slog-error-chain.workspace = true +thiserror.workspace = true +tokio.workspace = true +tufaceous-artifact.workspace = true +zone.workspace = true + +[dev-dependencies] +omicron-test-utils.workspace = true + +[features] +testing = [] diff --git a/sled-agent/config-reconciler/src/dataset_serialization_task.rs b/sled-agent/config-reconciler/src/dataset_serialization_task.rs new file mode 100644 index 00000000000..d3548469ff7 --- /dev/null +++ b/sled-agent/config-reconciler/src/dataset_serialization_task.rs @@ -0,0 +1,162 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Many of the ZFS operations sled-agent performs are not atomic, because they +//! involve multiple lower-level ZFS operations. This module implements a tokio +//! task that serializes a set of operations to ensure no two operations could +//! be executing concurrently. +//! +//! It uses the common pattern of "a task with a mpsc channel to send requests, +//! using oneshot channels to send responses". + +use camino::Utf8PathBuf; +use id_map::IdMap; +use id_map::IdMappable; +use nexus_sled_agent_shared::inventory::InventoryDataset; +use omicron_common::disk::DatasetConfig; +use omicron_common::zpool_name::ZpoolName; +use omicron_uuid_kinds::DatasetUuid; +use sled_storage::config::MountConfig; +use sled_storage::manager::NestedDatasetConfig; +use sled_storage::manager::NestedDatasetListOptions; +use sled_storage::manager::NestedDatasetLocation; +use slog::Logger; +use slog::warn; +use std::collections::BTreeSet; +use std::sync::Arc; +use tokio::sync::mpsc; + +#[derive(Debug, thiserror::Error)] +pub enum DatasetTaskError { + #[error("cannot perform dataset operations: waiting for key manager")] + WaitingForKeyManager, + #[error("dataset task busy; cannot service new requests")] + Busy, + #[error("internal error: dataset task exited!")] + Exited, +} + +#[derive(Debug)] +pub(crate) struct DatasetEnsureResult(IdMap); + +#[derive(Debug, Clone)] +struct SingleDatasetEnsureResult { + config: DatasetConfig, + state: DatasetState, +} + +impl IdMappable for SingleDatasetEnsureResult { + type Id = DatasetUuid; + + fn id(&self) -> Self::Id { + self.config.id + } +} + +#[derive(Debug, Clone)] +enum DatasetState { + Mounted, + FailedToMount, // TODO add error + UuidMismatch(DatasetUuid), + ZpoolNotFound, + ParentMissingFromConfig, + ParentFailedToMount, +} + +#[derive(Debug)] +pub(crate) struct DatasetTaskHandle(mpsc::Sender); + +impl DatasetTaskHandle { + pub fn spawn_dataset_task( + mount_config: Arc, + base_log: &Logger, + ) -> Self { + // We don't expect too many concurrent requests to this task, and want + // to detect "the task is wedged" pretty quickly. Common operations: + // + // 1. Reconciler wants to ensure datasets (at most 1 at a time) + // 2. Inventory requests from Nexus (likely at most 3 at a time) + // 3. Support bundle operations (unlikely to be multiple concurrently) + // + // so we'll pick a number that allows all of those plus a little + // overhead. + let (request_tx, request_rx) = mpsc::channel(16); + + tokio::spawn( + DatasetTask { + mount_config, + request_rx, + log: base_log.new(slog::o!("component" => "DatasetTask")), + } + .run(), + ); + + Self(request_tx) + } + + pub async fn datasets_ensure( + &self, + _dataset_configs: IdMap, + _zpools: BTreeSet, + ) -> Result { + unimplemented!() + } + + pub async fn inventory( + &self, + _zpools: BTreeSet, + ) -> Result, DatasetTaskError> { + unimplemented!() + } + + pub async fn nested_dataset_ensure_mounted( + &self, + _dataset: NestedDatasetLocation, + ) -> Result { + unimplemented!() + } + + pub async fn nested_dataset_ensure( + &self, + _config: NestedDatasetConfig, + ) -> Result<(), DatasetTaskError> { + unimplemented!() + } + + pub async fn nested_dataset_destroy( + &self, + _name: NestedDatasetLocation, + ) -> Result<(), DatasetTaskError> { + unimplemented!() + } + + pub async fn nested_dataset_list( + &self, + _name: NestedDatasetLocation, + _options: NestedDatasetListOptions, + ) -> Result, DatasetTaskError> { + unimplemented!() + } +} + +struct DatasetTask { + mount_config: Arc, + request_rx: mpsc::Receiver, + log: Logger, +} + +impl DatasetTask { + async fn run(mut self) { + while let Some(req) = self.request_rx.recv().await { + self.handle_request(req).await; + } + warn!(self.log, "all request handles closed; exiting dataset task"); + } + + async fn handle_request(&mut self, _req: DatasetTaskRequest) { + unimplemented!() + } +} + +enum DatasetTaskRequest {} diff --git a/sled-agent/src/dump_setup.rs b/sled-agent/config-reconciler/src/dump_setup.rs similarity index 97% rename from sled-agent/src/dump_setup.rs rename to sled-agent/config-reconciler/src/dump_setup.rs index b8846e117dd..6ab2ffd1082 100644 --- a/sled-agent/src/dump_setup.rs +++ b/sled-agent/config-reconciler/src/dump_setup.rs @@ -1,3 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + //! This module is responsible for moving debug info (kernel crash dumps, //! userspace process core dumps, and rotated logs) onto external drives for //! perusal/archival, and to prevent internal drives from filling up. @@ -92,13 +96,22 @@ use illumos_utils::dumpadm::{DumpAdm, DumpContentType}; use illumos_utils::zone::ZONE_PREFIX; use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; use omicron_common::disk::DiskVariant; +use sled_agent_types::support_bundle::BUNDLE_FILE_NAME; +use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME_SUFFIX; use sled_storage::config::MountConfig; use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET}; use sled_storage::disk::Disk; use slog::Logger; +use slog::debug; +use slog::error; +use slog::info; +use slog::o; +use slog::trace; +use slog::warn; use std::collections::HashSet; use std::ffi::OsString; use std::path::{Path, PathBuf}; +use std::sync::Arc; use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH}; use tokio::sync::mpsc::Receiver; use tokio::sync::oneshot; @@ -122,13 +135,13 @@ struct CoreDataset(Utf8PathBuf); #[derive(AsRef, Clone, Debug, From)] pub(super) struct CoreZpool { - mount_config: MountConfig, + mount_config: Arc, name: ZpoolName, } #[derive(AsRef, Clone, Debug, From)] pub(super) struct DebugZpool { - mount_config: MountConfig, + mount_config: Arc, name: ZpoolName, } @@ -204,13 +217,13 @@ struct DumpSetupWorker { pub struct DumpSetup { tx: tokio::sync::mpsc::Sender, - mount_config: MountConfig, + mount_config: Arc, _poller: tokio::task::JoinHandle<()>, log: Logger, } impl DumpSetup { - pub fn new(log: &Logger, mount_config: MountConfig) -> Self { + pub fn new(log: &Logger, mount_config: Arc) -> Self { let (tx, rx) = tokio::sync::mpsc::channel(16); let worker = DumpSetupWorker::new( Box::new(RealCoreDumpAdm {}), @@ -231,7 +244,7 @@ impl DumpSetup { /// This function returns only once this request has been handled, which /// can be used as a signal by callers that any "old disks" are no longer /// being used by [DumpSetup]. - pub(crate) async fn update_dumpdev_setup( + pub async fn update_dumpdev_setup( &self, disks: impl Iterator, ) { @@ -512,13 +525,11 @@ fn safe_to_delete(path: &Utf8Path, meta: &std::fs::Metadata) -> bool { return false; }; // Ignore support bundles - if file_name == crate::support_bundle::storage::BUNDLE_FILE_NAME { + if file_name == BUNDLE_FILE_NAME { return false; } // Ignore support bundle "temp files" as they're being created. - if file_name - .ends_with(crate::support_bundle::storage::BUNDLE_TMP_FILE_NAME_SUFFIX) - { + if file_name.ends_with(BUNDLE_TMP_FILE_NAME_SUFFIX) { return false; } return true; @@ -1273,7 +1284,6 @@ mod tests { use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET}; use std::collections::HashMap; use std::str::FromStr; - use tempfile::TempDir; use tokio::io::AsyncWriteExt; impl Clone for ZfsGetError { @@ -1400,7 +1410,7 @@ mod tests { // nothing when only a disk that's not ready let non_mounted_zpool = CoreZpool { - mount_config: MountConfig::default(), + mount_config: Arc::default(), name: ZpoolName::from_str(NOT_MOUNTED_INTERNAL).unwrap(), }; worker.update_disk_loadout(vec![], vec![], vec![non_mounted_zpool]); @@ -1419,16 +1429,17 @@ mod tests { const MOUNTED_INTERNAL: &str = "oxi_474e554e-6174-616c-6965-4e677579656e"; const ERROR_INTERNAL: &str = "oxi_4861636b-2054-6865-2050-6c616e657421"; + let mount_config = Arc::new(MountConfig::default()); let mounted_zpool = CoreZpool { - mount_config: MountConfig::default(), + mount_config: Arc::clone(&mount_config), name: ZpoolName::from_str(MOUNTED_INTERNAL).unwrap(), }; let non_mounted_zpool = CoreZpool { - mount_config: MountConfig::default(), + mount_config: Arc::clone(&mount_config), name: ZpoolName::from_str(NOT_MOUNTED_INTERNAL).unwrap(), }; let err_zpool = CoreZpool { - mount_config: MountConfig::default(), + mount_config, name: ZpoolName::from_str(ERROR_INTERNAL).unwrap(), }; const ZPOOL_MNT: &str = "/path/to/internal/zpool"; @@ -1494,12 +1505,9 @@ mod tests { // we make these so illumos_utils::dumpadm::dump_flag_is_valid returns what we want async fn populate_tempdir_with_fake_dumps( - tempdir: &TempDir, + tempdir: &Utf8TempDir, ) -> (DumpSlicePath, DumpSlicePath) { - let occupied = DumpSlicePath( - Utf8PathBuf::from_path_buf(tempdir.path().join("occupied.bin")) - .unwrap(), - ); + let occupied = DumpSlicePath(tempdir.path().join("occupied.bin")); let mut f = tokio::fs::File::create(occupied.as_ref()).await.unwrap(); f.write_all(&[0u8; DUMP_OFFSET as usize]).await.unwrap(); f.write_all(&DUMP_MAGIC.to_le_bytes()).await.unwrap(); @@ -1507,10 +1515,7 @@ mod tests { f.write_all(&DF_VALID.to_le_bytes()).await.unwrap(); drop(f); - let vacant = DumpSlicePath( - Utf8PathBuf::from_path_buf(tempdir.path().join("vacant.bin")) - .unwrap(), - ); + let vacant = DumpSlicePath(tempdir.path().join("vacant.bin")); let mut f = tokio::fs::File::create(vacant.as_ref()).await.unwrap(); f.write_all(&[0u8; DUMP_OFFSET as usize]).await.unwrap(); f.write_all(&DUMP_MAGIC.to_le_bytes()).await.unwrap(); @@ -1535,7 +1540,7 @@ mod tests { logctx.log.clone(), tokio::sync::mpsc::channel(1).1, ); - let tempdir = TempDir::new().unwrap(); + let tempdir = Utf8TempDir::new().unwrap(); let (occupied, _) = populate_tempdir_with_fake_dumps(&tempdir).await; worker.update_disk_loadout( @@ -1563,7 +1568,7 @@ mod tests { logctx.log.clone(), tokio::sync::mpsc::channel(1).1, ); - let tempdir = TempDir::new().unwrap(); + let tempdir = Utf8TempDir::new().unwrap(); let (occupied, vacant) = populate_tempdir_with_fake_dumps(&tempdir).await; worker.update_disk_loadout( @@ -1607,11 +1612,11 @@ mod tests { logctx.log.clone(), tokio::sync::mpsc::channel(1).1, ); - let tempdir = TempDir::new().unwrap(); + let tempdir = Utf8TempDir::new().unwrap(); let (occupied, _) = populate_tempdir_with_fake_dumps(&tempdir).await; let mounted_zpool = DebugZpool { - mount_config: MountConfig::default(), + mount_config: Arc::default(), name: ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap(), }; worker.update_disk_loadout( @@ -1635,12 +1640,12 @@ mod tests { "test_archives_rotated_logs_and_cores", ); - let tempdir = TempDir::new().unwrap(); + let tempdir = Utf8TempDir::new().unwrap(); let core_dir = tempdir.path().join(CRASH_DATASET); let debug_dir = tempdir.path().join(DUMP_DATASET); let zone_logs = tempdir.path().join("root/var/svc/log"); - let tempdir_path = tempdir.path().to_str().unwrap().to_string(); + let tempdir_path = tempdir.path().as_str().to_string(); let zone = Zone::from_str(&format!( "1:myzone:running:{tempdir_path}::ipkg:shared" )) @@ -1700,12 +1705,13 @@ mod tests { .await .expect("writing fake core"); + let mount_config = Arc::new(MountConfig::default()); let mounted_core_zpool = CoreZpool { - mount_config: MountConfig::default(), + mount_config: Arc::clone(&mount_config), name: ZpoolName::from_str(MOUNTED_INTERNAL).unwrap(), }; let mounted_debug_zpool = DebugZpool { - mount_config: MountConfig::default(), + mount_config, name: ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap(), }; @@ -1720,7 +1726,7 @@ mod tests { // it'll be renamed to use an epoch timestamp instead of .0 let log_glob = debug_dir.join(zone.name()).join(LOG_NAME.replace(".0", ".*")); - assert_eq!(glob::glob(log_glob.to_str().unwrap()).unwrap().count(), 1); + assert_eq!(glob::glob(log_glob.as_str()).unwrap().count(), 1); assert!(!zone_logs.join(LOG_NAME).is_file()); assert!(debug_dir.join(CORE_NAME).is_file()); assert!(!core_dir.join(CORE_NAME).is_file()); @@ -1819,7 +1825,7 @@ mod tests { ); let mounted_debug_zpool = DebugZpool { - mount_config: MountConfig::default(), + mount_config: Arc::default(), name: ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap(), }; diff --git a/sled-agent/config-reconciler/src/external_disks.rs b/sled-agent/config-reconciler/src/external_disks.rs new file mode 100644 index 00000000000..7be716e2709 --- /dev/null +++ b/sled-agent/config-reconciler/src/external_disks.rs @@ -0,0 +1,3 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. diff --git a/sled-agent/config-reconciler/src/handle.rs b/sled-agent/config-reconciler/src/handle.rs new file mode 100644 index 00000000000..272f53a8831 --- /dev/null +++ b/sled-agent/config-reconciler/src/handle.rs @@ -0,0 +1,417 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8PathBuf; +use illumos_utils::dladm::EtherstubVnic; +use illumos_utils::zpool::PathInPool; +use key_manager::StorageKeyRequester; +use nexus_sled_agent_shared::inventory::InventoryDataset; +use nexus_sled_agent_shared::inventory::InventoryDisk; +use nexus_sled_agent_shared::inventory::InventoryZpool; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; +use omicron_common::disk::DiskIdentity; +use sled_agent_api::ArtifactConfig; +use sled_storage::config::MountConfig; +use sled_storage::manager::NestedDatasetConfig; +use sled_storage::manager::NestedDatasetListOptions; +use sled_storage::manager::NestedDatasetLocation; +use slog::Logger; +use slog::warn; +use std::sync::Arc; +use std::sync::Mutex; +use std::sync::OnceLock; +use tokio::sync::watch; + +#[cfg(feature = "testing")] +use camino_tempfile::Utf8TempDir; +#[cfg(feature = "testing")] +use illumos_utils::zpool::ZpoolName; +#[cfg(feature = "testing")] +use illumos_utils::zpool::ZpoolOrRamdisk; +#[cfg(feature = "testing")] +use sled_storage::dataset::U2_DEBUG_DATASET; +#[cfg(feature = "testing")] +use sled_storage::dataset::ZONE_DATASET; + +use crate::DatasetTaskError; +use crate::LedgerArtifactConfigError; +use crate::LedgerNewConfigError; +use crate::LedgerTaskError; +use crate::SledAgentArtifactStore; +use crate::SledAgentFacilities; +use crate::TimeSyncStatus; +use crate::dataset_serialization_task::DatasetTaskHandle; +use crate::internal_disks::InternalDisksReceiver; +use crate::ledger::LedgerTaskHandle; +use crate::raw_disks; +use crate::raw_disks::RawDisksSender; +use crate::reconciler_task; +use crate::reconciler_task::CurrentlyManagedZpools; +use crate::reconciler_task::CurrentlyManagedZpoolsReceiver; +use crate::reconciler_task::ReconcilerResult; + +#[derive(Debug, Clone, Copy)] +pub enum TimeSyncConfig { + // Waits for NTP to confirm that time has been synchronized. + Normal, + // Skips timesync unconditionally. + Skip, +} + +#[derive(Debug)] +pub struct ConfigReconcilerHandle { + raw_disks_tx: RawDisksSender, + internal_disks_rx: InternalDisksReceiver, + dataset_task: DatasetTaskHandle, + reconciler_result_rx: watch::Receiver, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + + // `None` until `spawn_reconciliation_task()` is called. + ledger_task: OnceLock, + + // We have a two-phase initialization: we get some of our dependencies when + // `Self::new()` is called and the rest when + // `Self::spawn_reconciliation_task()` is called. We hold the dependencies + // that are available in `new` but not needed until + // `spawn_reconciliation_task` in this field. + reconciler_task_dependencies: Mutex>, +} + +impl ConfigReconcilerHandle { + /// Create a `ConfigReconcilerHandle` and spawn many of the early-sled-agent + /// background tasks (e.g., managing internal disks). + /// + /// The config reconciler subsystem splits initialization into two phases: + /// the main reconcilation task will not be spawned until + /// `spawn_reconciliation_task()` is called on the return handle. + /// `spawn_reconciliation_task()` cannot be called by sled-agent proper + /// until rack setup has occurred (or sled-agent has found its config from a + /// prior rack setup, during a cold boot). + pub fn new( + mount_config: MountConfig, + key_requester: StorageKeyRequester, + time_sync_config: TimeSyncConfig, + base_log: &Logger, + ) -> Self { + let mount_config = Arc::new(mount_config); + + // Spawn the task that monitors our internal disks (M.2s). + let (raw_disks_tx, raw_disks_rx) = raw_disks::new(); + let internal_disks_rx = + InternalDisksReceiver::spawn_internal_disks_task( + Arc::clone(&mount_config), + raw_disks_rx, + base_log, + ); + + // Spawn the task that serializes dataset operations. + let dataset_task = DatasetTaskHandle::spawn_dataset_task( + Arc::clone(&mount_config), + base_log, + ); + + // Stash the dependencies the reconciler task will need in + // `spawn_reconciliation_task()`. + let (reconciler_result_tx, reconciler_result_rx) = + watch::channel(ReconcilerResult::default()); + let (currently_managed_zpools_tx, currently_managed_zpools_rx) = + watch::channel(Arc::default()); + let reconciler_task_dependencies = + Mutex::new(Some(ReconcilerTaskDependencies { + key_requester, + time_sync_config, + reconciler_result_tx, + currently_managed_zpools_tx, + ledger_task_log: base_log + .new(slog::o!("component" => "SledConfigLedgerTask")), + reconciler_task_log: base_log + .new(slog::o!("component" => "ConfigReconcilerTask")), + })); + + Self { + raw_disks_tx, + internal_disks_rx, + dataset_task, + ledger_task: OnceLock::new(), + reconciler_result_rx, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver::new( + currently_managed_zpools_rx, + ), + reconciler_task_dependencies, + } + } + + /// Spawn the primary config reconciliation task. + /// + /// This method can effectively only be called once; any subsequent calls + /// will log a warning and do nothing. + pub fn spawn_reconciliation_task< + T: SledAgentFacilities, + U: SledAgentArtifactStore, + >( + &self, + underlay_vnic: EtherstubVnic, + sled_agent_facilities: T, + sled_agent_artifact_store: U, + log: &Logger, + ) { + let ReconcilerTaskDependencies { + key_requester, + time_sync_config, + reconciler_result_tx, + currently_managed_zpools_tx, + ledger_task_log, + reconciler_task_log, + } = match self.reconciler_task_dependencies.lock().unwrap().take() { + Some(dependencies) => dependencies, + None => { + warn!( + log, + "spawn_reconciliation_task() called multiple times \ + (ignored after first call)" + ); + return; + } + }; + + // Spawn the task that manages our config ledger. + let (ledger_task, current_config_rx) = + LedgerTaskHandle::spawn_ledger_task( + self.internal_disks_rx.clone(), + sled_agent_artifact_store, + ledger_task_log, + ); + match self.ledger_task.set(ledger_task) { + Ok(()) => (), + // We know via the `lock().take()` above that this only executes + // once, so `ledger_task` is always set here. + Err(_) => { + unreachable!("spawn_reconciliation_task() only executes once") + } + } + + reconciler_task::spawn( + key_requester, + time_sync_config, + underlay_vnic, + current_config_rx, + reconciler_result_tx, + currently_managed_zpools_tx, + sled_agent_facilities, + reconciler_task_log, + ); + } + + /// Get the current timesync status. + pub fn timesync_status(&self) -> TimeSyncStatus { + self.reconciler_result_rx.borrow().timesync_status() + } + + /// Get a handle to update the set of raw disks visible to sled-agent. + pub fn raw_disks_tx(&self) -> RawDisksSender { + self.raw_disks_tx.clone() + } + + /// Get a watch channel to receive changes to the set of managed internal + /// disks. + pub fn internal_disks_rx(&self) -> &InternalDisksReceiver { + &self.internal_disks_rx + } + + /// Get a watch channel to receive changes to the set of available external + /// disk datasets. + pub fn available_datasets_rx(&self) -> AvailableDatasetsReceiver { + AvailableDatasetsReceiver { + inner: AvailableDatasetsReceiverInner::Real( + self.reconciler_result_rx.clone(), + ), + } + } + + /// Get a watch channel to receive changes to the set of managed zpools. + pub fn currently_managed_zpools_rx( + &self, + ) -> &CurrentlyManagedZpoolsReceiver { + &self.currently_managed_zpools_rx + } + + /// Wait for the internal disks task to start managing the boot disk. + pub async fn wait_for_boot_disk(&mut self) -> DiskIdentity { + self.internal_disks_rx.wait_for_boot_disk().await + } + + /// Ensure a nested dataset is mounted. + pub async fn nested_dataset_ensure_mounted( + &self, + dataset: NestedDatasetLocation, + ) -> Result { + self.dataset_task.nested_dataset_ensure_mounted(dataset).await + } + + /// Ensure the existence of a nested dataset. + pub async fn nested_dataset_ensure( + &self, + config: NestedDatasetConfig, + ) -> Result<(), DatasetTaskError> { + self.dataset_task.nested_dataset_ensure(config).await + } + + /// Destroy a nested dataset. + pub async fn nested_dataset_destroy( + &self, + name: NestedDatasetLocation, + ) -> Result<(), DatasetTaskError> { + self.dataset_task.nested_dataset_destroy(name).await + } + + /// List a set of nested datasets. + pub async fn nested_dataset_list( + &self, + name: NestedDatasetLocation, + options: NestedDatasetListOptions, + ) -> Result, DatasetTaskError> { + self.dataset_task.nested_dataset_list(name, options).await + } + + /// Write a new sled config to the ledger. + pub async fn set_sled_config( + &self, + new_config: OmicronSledConfig, + ) -> Result, LedgerTaskError> { + self.ledger_task + .get() + .ok_or(LedgerTaskError::NotYetStarted)? + .set_new_config(new_config) + .await + } + + /// Validate that a new artifact config is legal (i.e., it doesn't remove + /// any artifact hashes in use by the currently-ledgered sled config). + pub async fn validate_artifact_config( + &self, + new_config: ArtifactConfig, + ) -> Result, LedgerTaskError> { + self.ledger_task + .get() + .ok_or(LedgerTaskError::NotYetStarted)? + .validate_artifact_config(new_config) + .await + } + + /// Collect inventory fields relevant to config reconciliation. + pub fn inventory(&self) -> ReconcilerInventory { + unimplemented!() + } +} + +#[derive(Debug)] +struct ReconcilerTaskDependencies { + key_requester: StorageKeyRequester, + time_sync_config: TimeSyncConfig, + reconciler_result_tx: watch::Sender, + currently_managed_zpools_tx: watch::Sender>, + ledger_task_log: Logger, + reconciler_task_log: Logger, +} + +#[derive(Debug)] +pub struct ReconcilerInventory { + pub disks: Vec, + pub zpools: Vec, + pub datasets: Vec, + pub ledgered_sled_config: Option, +} + +#[derive(Debug, Clone)] +pub struct AvailableDatasetsReceiver { + inner: AvailableDatasetsReceiverInner, +} + +impl AvailableDatasetsReceiver { + #[cfg(feature = "testing")] + pub fn fake_in_tempdir_for_tests(zpool: ZpoolOrRamdisk) -> Self { + let tempdir = Arc::new(Utf8TempDir::new().expect("created temp dir")); + std::fs::create_dir_all(tempdir.path().join(U2_DEBUG_DATASET)) + .expect("created test debug dataset directory"); + std::fs::create_dir_all(tempdir.path().join(ZONE_DATASET)) + .expect("created test zone root dataset directory"); + Self { + inner: AvailableDatasetsReceiverInner::FakeTempDir { + zpool, + tempdir, + }, + } + } + + #[cfg(feature = "testing")] + pub fn fake_static( + pools: impl Iterator, + ) -> Self { + Self { + inner: AvailableDatasetsReceiverInner::FakeStatic(pools.collect()), + } + } + + pub fn all_mounted_debug_datasets(&self) -> Vec { + match &self.inner { + AvailableDatasetsReceiverInner::Real(receiver) => { + receiver.borrow().all_mounted_debug_datasets().collect() + } + #[cfg(feature = "testing")] + AvailableDatasetsReceiverInner::FakeTempDir { zpool, tempdir } => { + vec![PathInPool { + pool: zpool.clone(), + path: tempdir.path().join(U2_DEBUG_DATASET), + }] + } + #[cfg(feature = "testing")] + AvailableDatasetsReceiverInner::FakeStatic(pools) => pools + .iter() + .map(|(pool, path)| PathInPool { + pool: ZpoolOrRamdisk::Zpool(pool.clone()), + path: path.join(U2_DEBUG_DATASET), + }) + .collect(), + } + } + + pub fn all_mounted_zone_root_datasets(&self) -> Vec { + match &self.inner { + AvailableDatasetsReceiverInner::Real(receiver) => { + receiver.borrow().all_mounted_zone_root_datasets().collect() + } + #[cfg(feature = "testing")] + AvailableDatasetsReceiverInner::FakeTempDir { zpool, tempdir } => { + vec![PathInPool { + pool: zpool.clone(), + path: tempdir.path().join(ZONE_DATASET), + }] + } + #[cfg(feature = "testing")] + AvailableDatasetsReceiverInner::FakeStatic(pools) => pools + .iter() + .map(|(pool, path)| PathInPool { + pool: ZpoolOrRamdisk::Zpool(pool.clone()), + path: path.join(ZONE_DATASET), + }) + .collect(), + } + } +} + +#[derive(Debug, Clone)] +enum AvailableDatasetsReceiverInner { + // The production path: available datasets are based on the results of the + // most recent reconciliation result. + Real(watch::Receiver), + // Test path: allow tests to place datasets in a temp directory. + #[cfg(feature = "testing")] + FakeTempDir { + zpool: ZpoolOrRamdisk, + tempdir: Arc, + }, + // Test path: allow tests to specify their own directories. + #[cfg(feature = "testing")] + FakeStatic(Vec<(ZpoolName, Utf8PathBuf)>), +} diff --git a/sled-agent/config-reconciler/src/internal_disks.rs b/sled-agent/config-reconciler/src/internal_disks.rs new file mode 100644 index 00000000000..c63feff4441 --- /dev/null +++ b/sled-agent/config-reconciler/src/internal_disks.rs @@ -0,0 +1,309 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tokio task responsible for starting management of our "internal" disks (on +//! gimlet/cosmo, M.2s). These disks are usable earlier than external disks, in +//! particular: +//! +//! * Before rack setup +//! * During sled-agent startup, before trust quorum has unlocked + +use camino::Utf8PathBuf; +use id_map::IdMap; +use id_map::IdMappable; +use omicron_common::disk::DiskIdentity; +use omicron_common::disk::M2Slot; +use omicron_common::zpool_name::ZpoolName; +use sled_hardware::PooledDiskError; +use sled_storage::config::MountConfig; +use sled_storage::dataset::CLUSTER_DATASET; +use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::dataset::M2_ARTIFACT_DATASET; +use sled_storage::dataset::M2_DEBUG_DATASET; +use sled_storage::disk::Disk; +use slog::Logger; +use std::mem; +use std::sync::Arc; +use tokio::sync::watch; +use tokio::sync::watch::error::RecvError; + +use crate::dump_setup::DumpSetup; +use crate::raw_disks::RawDiskWithId; + +/// A thin wrapper around a [`watch::Receiver`] that presents a similar API. +#[derive(Debug, Clone)] +pub struct InternalDisksReceiver { + mount_config: Arc, + inner: InternalDisksReceiverInner, +} + +#[derive(Debug, Clone)] +enum InternalDisksReceiverInner { + Real(watch::Receiver>>), + #[cfg(feature = "testing")] + FakeStatic(Arc>), +} + +impl InternalDisksReceiver { + #[cfg(feature = "testing")] + pub fn fake_static( + mount_config: Arc, + disks: impl Iterator, + ) -> Self { + let inner = InternalDisksReceiverInner::FakeStatic(Arc::new( + disks + .map(|(identity, zpool_name)| InternalDiskDetails { + identity: Arc::new(identity), + zpool_name, + // We can expand the interface for fake disks if we need to + // be able to specify these fields in future tests. + is_boot_disk: false, + slot: None, + raw_devfs_path: None, + }) + .collect(), + )); + Self { mount_config, inner } + } + + pub(crate) fn spawn_internal_disks_task( + mount_config: Arc, + raw_disks_rx: watch::Receiver>, + base_log: &Logger, + ) -> Self { + let (disks_tx, disks_rx) = watch::channel(Arc::default()); + + tokio::spawn( + InternalDisksTask { + disks_tx, + raw_disks_rx, + mount_config: Arc::clone(&mount_config), + dump_setup: DumpSetup::new(base_log, Arc::clone(&mount_config)), + log: base_log.new(slog::o!("component" => "InternalDisksTask")), + } + .run(), + ); + + Self { mount_config, inner: InternalDisksReceiverInner::Real(disks_rx) } + } + + /// Get the current set of managed internal disks without marking the + /// returned value as seen. + /// + /// This is analogous to [`watch::Receiver::borrow()`], except it returns an + /// owned value that does not keep the internal watch lock held. + pub fn current(&self) -> InternalDisks { + let disks = match &self.inner { + InternalDisksReceiverInner::Real(rx) => Arc::clone(&*rx.borrow()), + #[cfg(feature = "testing")] + InternalDisksReceiverInner::FakeStatic(disks) => Arc::clone(disks), + }; + InternalDisks { disks, mount_config: Arc::clone(&self.mount_config) } + } + + /// Get the current set of managed internal disks and mark the returned + /// value as seen. + /// + /// This is analogous to [`watch::Receiver::borrow_and_update()`], except it + /// returns an owned value that does not keep the internal watch lock held. + pub fn current_and_update(&mut self) -> InternalDisks { + let disks = match &mut self.inner { + InternalDisksReceiverInner::Real(rx) => { + Arc::clone(&*rx.borrow_and_update()) + } + #[cfg(feature = "testing")] + InternalDisksReceiverInner::FakeStatic(disks) => Arc::clone(disks), + }; + InternalDisks { disks, mount_config: Arc::clone(&self.mount_config) } + } + + /// Wait for changes to the set of managed internal disks. + pub async fn changed(&mut self) -> Result<(), RecvError> { + match &mut self.inner { + InternalDisksReceiverInner::Real(rx) => rx.changed().await, + #[cfg(feature = "testing")] + InternalDisksReceiverInner::FakeStatic(_) => { + // Static set of disks never changes + std::future::pending().await + } + } + } + + /// Wait until the boot disk is managed, returning its identity. + /// + /// Internally updates the most-recently-seen value. + pub(crate) async fn wait_for_boot_disk(&mut self) -> DiskIdentity { + let disks_rx = match &mut self.inner { + InternalDisksReceiverInner::Real(rx) => rx, + #[cfg(feature = "testing")] + InternalDisksReceiverInner::FakeStatic(disks) => { + if let Some(disk) = disks.iter().find(|d| d.is_boot_disk) { + return (*disk.identity).clone(); + } + panic!("fake InternalDisksReceiver has no boot disk") + } + }; + loop { + let disks = disks_rx.borrow_and_update(); + if let Some(disk) = disks.iter().find(|d| d.is_boot_disk) { + return (*disk.identity).clone(); + } + mem::drop(disks); + + disks_rx.changed().await.expect("InternalDisks task never dies"); + } + } +} + +pub struct InternalDisks { + disks: Arc>, + mount_config: Arc, +} + +impl InternalDisks { + pub fn mount_config(&self) -> &MountConfig { + &self.mount_config + } + + pub fn boot_disk_zpool(&self) -> Option<&ZpoolName> { + self.disks.iter().find_map(|d| { + if d.is_boot_disk { Some(&d.zpool_name) } else { None } + }) + } + + pub fn image_raw_devfs_path( + &self, + slot: M2Slot, + ) -> Option>> { + self.disks.iter().find_map(|disk| { + if disk.slot == Some(slot) { + disk.raw_devfs_path.clone() + } else { + None + } + }) + } + + /// Returns all `CONFIG_DATASET` paths within available M.2 disks. + pub fn all_config_datasets( + &self, + ) -> impl ExactSizeIterator + '_ { + self.all_datasets(CONFIG_DATASET) + } + + /// Returns all `M2_DEBUG_DATASET` paths within available M.2 disks. + pub fn all_debug_datasets( + &self, + ) -> impl ExactSizeIterator + '_ { + self.all_datasets(M2_DEBUG_DATASET) + } + + /// Returns all `CLUSTER_DATASET` paths within available M.2 disks. + pub fn all_cluster_datasets( + &self, + ) -> impl ExactSizeIterator + '_ { + self.all_datasets(CLUSTER_DATASET) + } + + /// Returns all `ARTIFACT_DATASET` paths within available M.2 disks. + pub fn all_artifact_datasets( + &self, + ) -> impl ExactSizeIterator + '_ { + self.all_datasets(M2_ARTIFACT_DATASET) + } + + /// Return the directories for storing zone service bundles. + pub fn all_zone_bundle_directories( + &self, + ) -> impl ExactSizeIterator + '_ { + // The directory within the debug dataset in which bundles are created. + const BUNDLE_DIRECTORY: &str = "bundle"; + + // The directory for zone bundles. + const ZONE_BUNDLE_DIRECTORY: &str = "zone"; + + self.all_debug_datasets() + .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY)) + } + + fn all_datasets( + &self, + dataset_name: &'static str, + ) -> impl ExactSizeIterator + '_ { + self.disks.iter().map(|disk| { + disk.zpool_name + .dataset_mountpoint(&self.mount_config.root, dataset_name) + }) + } +} + +// A summary of a [`Disk`] without providing any of the associated functionality. +#[derive(Debug)] +struct InternalDiskDetails { + identity: Arc, + zpool_name: ZpoolName, + is_boot_disk: bool, + + // These two fields are optional because they don't exist for synthetic + // disks. + slot: Option, + raw_devfs_path: Option>>, +} + +impl IdMappable for InternalDiskDetails { + type Id = (bool, Arc); + + fn id(&self) -> Self::Id { + // Using `!self.is_boot_disk` as the first part of the ID allows us to + // guarantee we always iterate over the internal disks with the current + // boot disk first. + // + // TODO-cleanup add tests for this + (!self.is_boot_disk, Arc::clone(&self.identity)) + } +} + +impl From<&'_ Disk> for InternalDiskDetails { + fn from(disk: &'_ Disk) -> Self { + // Synthetic disks panic if asked for their `slot()`, so filter + // them out first. + let slot = if disk.is_synthetic() { + None + } else { + M2Slot::try_from(disk.slot()).ok() + }; + + // Same story for devfs path. + let raw_devfs_path = if disk.is_synthetic() { + None + } else { + Some(disk.boot_image_devfs_path(true).map_err(Arc::new)) + }; + + Self { + identity: Arc::new(disk.identity().clone()), + zpool_name: disk.zpool_name().clone(), + is_boot_disk: disk.is_boot_disk(), + slot, + raw_devfs_path, + } + } +} + +struct InternalDisksTask { + disks_tx: watch::Sender>>, + raw_disks_rx: watch::Receiver>, + mount_config: Arc, + + // Invokes dumpadm(8) and savecore(8) when new disks are encountered + dump_setup: DumpSetup, + + log: Logger, +} + +impl InternalDisksTask { + async fn run(self) { + unimplemented!() + } +} diff --git a/sled-agent/config-reconciler/src/ledger.rs b/sled-agent/config-reconciler/src/ledger.rs new file mode 100644 index 00000000000..9a55c476b1f --- /dev/null +++ b/sled-agent/config-reconciler/src/ledger.rs @@ -0,0 +1,172 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tokio task responsible for managing the [`OmicronSledConfig`] ledger. + +use dropshot::HttpError; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; +use omicron_common::api::external::Generation; +use omicron_common::ledger; +use sled_agent_api::ArtifactConfig; +use slog::Logger; +use slog_error_chain::InlineErrorChain; +use std::collections::BTreeMap; +use tokio::sync::mpsc; +use tokio::sync::oneshot; +use tokio::sync::watch; +use tufaceous_artifact::ArtifactHash; + +use crate::InternalDisksReceiver; +use crate::SledAgentArtifactStore; + +#[derive(Debug, thiserror::Error)] +pub enum LedgerTaskError { + #[error("ledger task has not been started yet")] + NotYetStarted, + #[error("ledger task busy; cannot service new requests")] + Busy, + #[error("internal error: ledger task exited!")] + Exited, +} + +impl From for HttpError { + fn from(err: LedgerTaskError) -> Self { + let message = InlineErrorChain::new(&err).to_string(); + match err { + LedgerTaskError::NotYetStarted | LedgerTaskError::Busy => { + HttpError::for_unavail(None, message) + } + LedgerTaskError::Exited => HttpError::for_internal_error(message), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum LedgerNewConfigError { + #[error("cannot write sled config ledger: no M.2 disks available")] + NoM2Disks, + #[error("cannot accept sled config: waiting for key manager")] + WaitingForKeyManager, + #[error( + "sled config generation out of date (got {requested}, have {current})" + )] + GenerationOutdated { current: Generation, requested: Generation }, + #[error("sled config changed with the same generation ({generation})")] + ConfigurationChanged { generation: Generation }, + #[error("failed to commit sled config to ledger")] + LedgerCommitFailed(#[source] ledger::Error), +} + +impl From for HttpError { + fn from(err: LedgerNewConfigError) -> Self { + let message = InlineErrorChain::new(&err).to_string(); + match err { + LedgerNewConfigError::NoM2Disks + | LedgerNewConfigError::WaitingForKeyManager => { + HttpError::for_unavail(None, message) + } + LedgerNewConfigError::GenerationOutdated { .. } + | LedgerNewConfigError::ConfigurationChanged { .. } => { + HttpError::for_bad_request(None, message) + } + LedgerNewConfigError::LedgerCommitFailed(_) => { + HttpError::for_internal_error(message) + } + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum LedgerArtifactConfigError { + #[error( + "Artifacts in use by ledgered sled config are not present \ + in new artifact config: {0:?}" + )] + InUseArtifactedMissing(BTreeMap), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum CurrentSledConfig { + /// We're still waiting on the M.2 drives to be found: We don't yet know + /// whether we have a ledgered config, nor would we be able to write one. + WaitingForInternalDisks, + /// We have at least one M.2 drive, but there is no ledgered config: we're + /// waiting for our initial config (from RSS, if the rack hasn't been set up + /// yet, or from Nexus if we're a newly-added sled). + WaitingForInitialConfig, + /// We have a ledgered config. + Ledgered(OmicronSledConfig), +} + +#[derive(Debug)] +pub(crate) struct LedgerTaskHandle { + request_tx: mpsc::Sender, +} + +impl LedgerTaskHandle { + pub fn spawn_ledger_task( + internal_disks_rx: InternalDisksReceiver, + artifact_store: T, + log: Logger, + ) -> (Self, watch::Receiver) { + // TODO pick and explain a channel size + let (request_tx, request_rx) = mpsc::channel(8); + + // We always start in the "waiting for internal disks" state. The + // internal disk management task is started more or less concurrently + // with us, so we won't stay in this state for long unless something is + // very wrong. + let (current_config_tx, current_config_rx) = + watch::channel(CurrentSledConfig::WaitingForInternalDisks); + + tokio::spawn( + LedgerTask { + artifact_store, + request_rx, + internal_disks_rx, + current_config_tx, + log, + } + .run(), + ); + + (Self { request_tx }, current_config_rx) + } + + pub async fn set_new_config( + &self, + _new_config: OmicronSledConfig, + ) -> Result, LedgerTaskError> { + unimplemented!() + } + + pub async fn validate_artifact_config( + &self, + _new_config: ArtifactConfig, + ) -> Result, LedgerTaskError> { + unimplemented!() + } +} + +#[derive(Debug)] +enum LedgerTaskRequest { + WriteNewConfig { + new_config: OmicronSledConfig, + tx: oneshot::Sender>, + }, +} + +struct LedgerTask { + artifact_store: T, + request_rx: mpsc::Receiver, + internal_disks_rx: InternalDisksReceiver, + current_config_tx: watch::Sender, + log: Logger, +} + +impl LedgerTask { + async fn run(self) { + unimplemented!() + } +} diff --git a/sled-agent/config-reconciler/src/lib.rs b/sled-agent/config-reconciler/src/lib.rs new file mode 100644 index 00000000000..01ab8e0ed39 --- /dev/null +++ b/sled-agent/config-reconciler/src/lib.rs @@ -0,0 +1,80 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Machinery for sled-agent to reconcile available hardware resources with the +//! Nexus-provided `OmicronSledConfig` describing the set of disks, datasets, +//! and Omicron zones that should be in service. +//! +//! The initial entry point to this system is [`ConfigReconcilerHandle::new()`]. +//! This should be called early in sled-agent startup. Later during the +//! sled-agent start process, once sled-agent has gotten its top-level +//! configuration (either from RSS, if going through rack setup, or by reading +//! from the internal disks, if cold booting), sled-agent should call +//! [`ConfigReconcilerHandle::spawn_reconciliation_task()`]. These two calls +//! will spawn several tokio tasks that run for the duration of the sled-agent +//! process: +//! +//! * A task for managing internal disks (in the `internal_disks` module of this +//! crate). This task takes raw disks as input over a watch channel, and emits +//! managed internal disks as output on another watch channel. Sled-agent +//! components that care about internal disks can get access to this output +//! channel via [`ConfigReconcilerHandle::internal_disks_rx()`]. On Gimlet and +//! Cosmo, "internal" disks have the M.2 form factor; much existing code will +//! refer to them as `m2` disks, but the form factor may change in future +//! sleds. The term "internal disks" is for "the disks that are not easily +//! swappable and can be managed before trust quorum has unlocked"; we use +//! them to store ledgers, etc. +//! * A task for serializing ZFS operations on datasets (in the +//! `dataset_serialization_task` module of this crate). This task takes +//! requests over an `mpsc` channel. This channel is not exposed directly; +//! instead, various operations that go through this task are implemented as +//! methods on `ConfigReconcilerHandle` (e.g., +//! [`ConfigReconcilerHandle::nested_dataset_ensure_mounted()]). +//! * A task for managing the ledgered `OmicronSledConfig` for this sled. This +//! task takes requests over an `mpsc` channel, and also emits the state of +//! the currently-ledgered config over a watch channel. This watch channel is +//! not directly exposed outside this crate, but is indirectly accessible via +//! specific operations like [`ConfigReconcilerHandle::inventory()`]. +//! * A task that performs reconciliation of all of the above tasks' outputs and +//! the current `OmicronSledConfig`. This task does the actual work to manage +//! external (in Gimlet and Cosmo, U.2 form factor) disks, datasets on +//! external disks, and Omicron service zones. It emits its results on a watch +//! channel. This watch channel is not exposed directly, but specific views of +//! it are available via methods like +//! [`ConfigReconcilerHandle::timesync_status()`] and +//! [`ConfigReconcilerHandle::inventory()`]. + +// TODO-cleanup Remove once we've filled in all the `unimplemented!()`s that +// will make use of various arguments and fields. +#![allow(dead_code)] + +mod dataset_serialization_task; +mod external_disks; +mod handle; +mod internal_disks; +mod ledger; +mod raw_disks; +mod reconciler_task; +mod sled_agent_facilities; + +// TODO-cleanup Make this private once the reconciler uses it instead of +// sled-agent proper. +pub mod dump_setup; + +pub use dataset_serialization_task::DatasetTaskError; +pub use handle::AvailableDatasetsReceiver; +pub use handle::ConfigReconcilerHandle; +pub use handle::ReconcilerInventory; +pub use handle::TimeSyncConfig; +pub use internal_disks::InternalDisks; +pub use internal_disks::InternalDisksReceiver; +pub use ledger::LedgerArtifactConfigError; +pub use ledger::LedgerNewConfigError; +pub use ledger::LedgerTaskError; +pub use raw_disks::RawDisksSender; +pub use reconciler_task::CurrentlyManagedZpools; +pub use reconciler_task::CurrentlyManagedZpoolsReceiver; +pub use reconciler_task::TimeSyncStatus; +pub use sled_agent_facilities::SledAgentArtifactStore; +pub use sled_agent_facilities::SledAgentFacilities; diff --git a/sled-agent/config-reconciler/src/raw_disks.rs b/sled-agent/config-reconciler/src/raw_disks.rs new file mode 100644 index 00000000000..3ba49e0a5c6 --- /dev/null +++ b/sled-agent/config-reconciler/src/raw_disks.rs @@ -0,0 +1,80 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Provides thin wrappers around a watch channel managing the set of +//! [`RawDisk`]s sled-agent is aware of. + +use id_map::IdMap; +use id_map::IdMappable; +use omicron_common::disk::DiskIdentity; +use sled_storage::disk::RawDisk; +use slog::Logger; +use std::ops::Deref; +use std::sync::Arc; +use tokio::sync::watch; + +pub(crate) fn new() -> (RawDisksSender, watch::Receiver>) { + let (tx, rx) = watch::channel(IdMap::default()); + (RawDisksSender(tx), rx) +} + +#[derive(Debug, Clone)] +pub struct RawDisksSender(watch::Sender>); + +impl RawDisksSender { + /// Set the complete set of raw disks visible to sled-agent. + pub fn set_raw_disks(&self, _raw_disks: I, _log: &Logger) + where + I: Iterator, + { + unimplemented!() + } + + /// Add or update the properties of a raw disk visible to sled-agent. + pub fn add_or_update_raw_disk( + &self, + _disk: RawDisk, + _log: &Logger, + ) -> bool { + unimplemented!() + } + + /// Remove a raw disk that is no longer visible to sled-agent. + pub fn remove_raw_disk( + &self, + _identity: &DiskIdentity, + _log: &Logger, + ) -> bool { + unimplemented!() + } +} + +// Adapter to store `RawDisk` in an `IdMap` with cheap key cloning. +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct RawDiskWithId { + identity: Arc, + disk: RawDisk, +} + +impl IdMappable for RawDiskWithId { + type Id = Arc; + + fn id(&self) -> Self::Id { + Arc::clone(&self.identity) + } +} + +impl From for RawDiskWithId { + fn from(disk: RawDisk) -> Self { + Self { identity: Arc::new(disk.identity().clone()), disk } + } +} + +impl Deref for RawDiskWithId { + type Target = RawDisk; + + fn deref(&self) -> &Self::Target { + &self.disk + } +} diff --git a/sled-agent/config-reconciler/src/reconciler_task.rs b/sled-agent/config-reconciler/src/reconciler_task.rs new file mode 100644 index 00000000000..33c9645f474 --- /dev/null +++ b/sled-agent/config-reconciler/src/reconciler_task.rs @@ -0,0 +1,246 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The primary task for sled config reconciliation. + +use chrono::DateTime; +use chrono::Utc; +use illumos_utils::dladm::EtherstubVnic; +use illumos_utils::running_zone::RunCommandError; +use illumos_utils::zpool::PathInPool; +use illumos_utils::zpool::ZpoolName; +use key_manager::StorageKeyRequester; +use nexus_sled_agent_shared::inventory::OmicronSledConfig; +use sled_agent_types::time_sync::TimeSync; +use slog::Logger; +use std::collections::BTreeSet; +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; +use tokio::sync::watch; +use tokio::sync::watch::error::RecvError; + +use crate::TimeSyncConfig; +use crate::ledger::CurrentSledConfig; +use crate::sled_agent_facilities::SledAgentFacilities; + +#[allow(clippy::too_many_arguments)] +pub(crate) fn spawn( + key_requester: StorageKeyRequester, + time_sync_config: TimeSyncConfig, + underlay_vnic: EtherstubVnic, + current_config_rx: watch::Receiver, + reconciler_result_tx: watch::Sender, + currently_managed_zpools_tx: watch::Sender>, + sled_agent_facilities: T, + log: Logger, +) { + tokio::spawn( + ReconcilerTask { + key_requester, + time_sync_config, + underlay_vnic, + current_config_rx, + reconciler_result_tx, + currently_managed_zpools_tx, + sled_agent_facilities, + log, + } + .run(), + ); +} + +#[derive(Debug, Clone)] +pub(crate) struct ReconcilerResult { + status: ReconcilerTaskStatus, + latest_result: Option>, +} + +impl Default for ReconcilerResult { + fn default() -> Self { + Self { + status: ReconcilerTaskStatus::NotYetRunning, + latest_result: None, + } + } +} + +impl ReconcilerResult { + pub fn timesync_status(&self) -> TimeSyncStatus { + self.latest_result + .as_deref() + .map(|inner| inner.timesync_status.clone()) + .unwrap_or(TimeSyncStatus::NotYetChecked) + } + + pub fn all_mounted_debug_datasets( + &self, + ) -> impl Iterator + '_ { + // unimplemented!() doesn't work with `-> impl Iterator` + if 1 > 0 { + panic!("unimplemented!"); + } + std::iter::empty() + } + + pub fn all_mounted_zone_root_datasets( + &self, + ) -> impl Iterator + '_ { + // unimplemented!() doesn't work with `-> impl Iterator` + if 1 > 0 { + panic!("unimplemented!"); + } + std::iter::empty() + } +} + +#[derive(Debug, Clone)] +pub enum ReconcilerTaskStatus { + NotYetRunning, + WaitingForInternalDisks, + WaitingForRackSetup, + PerformingReconciliation { + config: OmicronSledConfig, + started_at_time: DateTime, + started_at_instant: Instant, + }, + Idle { + completed_at_time: DateTime, + ran_for: Duration, + }, +} + +#[derive(Debug, Clone)] +pub enum TimeSyncStatus { + NotYetChecked, + ConfiguredToSkip, + FailedToGetSyncStatus(Arc), + TimeSync(TimeSync), +} + +#[derive(Debug, thiserror::Error)] +pub enum TimeSyncError { + #[error("no running NTP zone")] + NoRunningNtpZone, + #[error("multiple running NTP zones - this should never happen!")] + MultipleRunningNtpZones, + #[error("failed to execute chronyc within NTP zone")] + ExecuteChronyc(#[source] RunCommandError), + #[error( + "failed to parse chronyc tracking output: {reason} (stdout: {stdout:?})" + )] + FailedToParse { reason: &'static str, stdout: String }, +} + +#[derive(Debug, Clone)] +pub struct CurrentlyManagedZpoolsReceiver { + inner: CurrentlyManagedZpoolsReceiverInner, +} + +#[derive(Debug, Clone)] +enum CurrentlyManagedZpoolsReceiverInner { + Real(watch::Receiver>), + #[cfg(feature = "testing")] + FakeStatic(BTreeSet), +} + +impl CurrentlyManagedZpoolsReceiver { + #[cfg(feature = "testing")] + pub fn fake_static(zpools: impl Iterator) -> Self { + Self { + inner: CurrentlyManagedZpoolsReceiverInner::FakeStatic( + zpools.collect(), + ), + } + } + + pub(crate) fn new( + rx: watch::Receiver>, + ) -> Self { + Self { inner: CurrentlyManagedZpoolsReceiverInner::Real(rx) } + } + + pub fn current(&self) -> Arc { + match &self.inner { + CurrentlyManagedZpoolsReceiverInner::Real(rx) => { + Arc::clone(&*rx.borrow()) + } + #[cfg(feature = "testing")] + CurrentlyManagedZpoolsReceiverInner::FakeStatic(zpools) => { + Arc::new(CurrentlyManagedZpools(zpools.clone())) + } + } + } + + pub fn current_and_update(&mut self) -> Arc { + match &mut self.inner { + CurrentlyManagedZpoolsReceiverInner::Real(rx) => { + Arc::clone(&*rx.borrow_and_update()) + } + #[cfg(feature = "testing")] + CurrentlyManagedZpoolsReceiverInner::FakeStatic(zpools) => { + Arc::new(CurrentlyManagedZpools(zpools.clone())) + } + } + } + + /// Wait for changes in the underlying watch channel. + /// + /// Cancel-safe. + pub async fn changed(&mut self) -> Result<(), RecvError> { + match &mut self.inner { + CurrentlyManagedZpoolsReceiverInner::Real(rx) => rx.changed().await, + #[cfg(feature = "testing")] + CurrentlyManagedZpoolsReceiverInner::FakeStatic(_) => { + // Static set of zpools never changes + std::future::pending().await + } + } + } +} + +/// Set of currently managed zpools. +/// +/// This handle should only be used to decide to _stop_ using a zpool (e.g., if +/// a previously-launched zone is on a zpool that is no longer managed). It does +/// not expose a means to list or choose from the currently-managed pools; +/// instead, consumers should choose mounted datasets. +/// +/// This level of abstraction even for "when to stop using a zpool" is probably +/// wrong: if we choose a dataset on which to place a zone's root, we should +/// shut that zone down if the _dataset_ goes away, not the zpool. For now we +/// live with "assume the dataset bases we choose stick around as long as their +/// parent zpool does". +#[derive(Default, Debug, Clone)] +pub struct CurrentlyManagedZpools(BTreeSet); + +impl CurrentlyManagedZpools { + /// Returns true if `zpool` is currently managed. + pub fn contains(&self, zpool: &ZpoolName) -> bool { + self.0.contains(zpool) + } +} + +#[derive(Debug)] +struct LatestReconcilerTaskResultInner { + sled_config: OmicronSledConfig, + timesync_status: TimeSyncStatus, +} + +struct ReconcilerTask { + key_requester: StorageKeyRequester, + time_sync_config: TimeSyncConfig, + underlay_vnic: EtherstubVnic, + current_config_rx: watch::Receiver, + reconciler_result_tx: watch::Sender, + currently_managed_zpools_tx: watch::Sender>, + sled_agent_facilities: T, + log: Logger, +} + +impl ReconcilerTask { + async fn run(self) { + unimplemented!() + } +} diff --git a/sled-agent/config-reconciler/src/sled_agent_facilities.rs b/sled-agent/config-reconciler/src/sled_agent_facilities.rs new file mode 100644 index 00000000000..36dc0c47983 --- /dev/null +++ b/sled-agent/config-reconciler/src/sled_agent_facilities.rs @@ -0,0 +1,69 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Traits that allow `sled-agent-config-reconciler` to be a separate crate but +//! still use facilities implemented in `sled-agent` proper. + +use illumos_utils::running_zone::RunningZone; +use illumos_utils::zpool::ZpoolName; +use nexus_sled_agent_shared::inventory::OmicronZoneConfig; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use sled_agent_types::zone_bundle::ZoneBundleCause; +use sled_storage::config::MountConfig; +use tufaceous_artifact::ArtifactHash; + +#[allow(async_fn_in_trait)] // TODO confirm this makes sense and explain +pub trait SledAgentFacilities: Send + 'static { + type StartZoneError; + type MetricsUntrackZoneLinksError; + type ZoneBundleCreateError; + + /// Called once time is synchronized. + // TODO-cleanup should we do this work ourselves instead? + async fn on_time_sync(&self); + + /// Method to start a zone. + // TODO-cleanup This is implemented by + // `ServiceManager::start_omicron_zone()`, which does too much; we should + // absorb some of its functionality and shrink this interface. We definitely + // should not need to pass the full list of U2 zpools. + async fn start_omicron_zone( + &self, + zone_config: &OmicronZoneConfig, + mount_config: &MountConfig, + is_time_synchronized: bool, + all_u2_pools: &[ZpoolName], + ) -> Result; + + /// Stop tracking metrics for a zone's datalinks. + fn metrics_untrack_zone_links( + &self, + zone: &RunningZone, + ) -> Result<(), Self::MetricsUntrackZoneLinksError>; + + /// Instruct DDM to start advertising a prefix. + fn ddm_add_internal_dns_prefix(&self, prefix: Ipv6Subnet); + + /// Instruct DDM to stop advertising a prefix. + fn ddm_remove_internal_dns_prefix(&self, prefix: Ipv6Subnet); + + /// Create a zone bundle. + async fn zone_bundle_create( + &self, + zone: &RunningZone, + cause: ZoneBundleCause, + ) -> Result<(), Self::ZoneBundleCreateError>; +} + +#[allow(async_fn_in_trait)] // TODO confirm this makes sense and explain +pub trait SledAgentArtifactStore: Send + 'static { + type ArtifactExistsValidationError; + + /// Check an artifact exists in the TUF Repo Depot storage. + async fn validate_artifact_exists_in_storage( + &self, + artifact: ArtifactHash, + ) -> Result<(), Self::ArtifactExistsValidationError>; +} diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 1e783f0b517..3a63ff5c117 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -21,7 +21,6 @@ mod boot_disk_os_writer; pub mod bootstrap; pub mod config; mod ddm_reconciler; -pub(crate) mod dump_setup; pub(crate) mod hardware_monitor; mod http_entrypoints; mod instance; diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs index 11883adcd24..626d81d54ff 100644 --- a/sled-agent/src/storage_monitor.rs +++ b/sled-agent/src/storage_monitor.rs @@ -6,12 +6,13 @@ //! and dispatches them to other parts of the bootstrap agent and sled agent //! code. -use crate::dump_setup::DumpSetup; use omicron_common::api::external::Generation; +use sled_agent_config_reconciler::dump_setup::DumpSetup; use sled_storage::config::MountConfig; use sled_storage::manager::StorageHandle; use sled_storage::resources::AllDisks; use slog::Logger; +use std::sync::Arc; use tokio::sync::watch; #[derive(thiserror::Error, Debug)] @@ -74,7 +75,7 @@ impl StorageMonitor { mount_config: MountConfig, storage_manager: StorageHandle, ) -> (StorageMonitor, StorageMonitorHandle) { - let dump_setup = DumpSetup::new(&log, mount_config); + let dump_setup = DumpSetup::new(&log, Arc::new(mount_config)); let log = log.new(o!("component" => "StorageMonitor")); let (tx, rx) = watch::channel(StorageMonitorStatus::new()); ( diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 0592804690f..9101e090725 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -28,6 +28,8 @@ use range_requests::PotentialRange; use range_requests::SingleRange; use sha2::{Digest, Sha256}; use sled_agent_api::*; +use sled_agent_types::support_bundle::BUNDLE_FILE_NAME; +use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME_SUFFIX; use sled_storage::manager::NestedDatasetConfig; use sled_storage::manager::NestedDatasetListOptions; use sled_storage::manager::NestedDatasetLocation; @@ -43,21 +45,6 @@ use tokio_util::io::ReaderStream; use tufaceous_artifact::ArtifactHash; use zip::result::ZipError; -// The final name of the bundle, as it is stored within the dedicated -// datasets. -// -// The full path is of the form: -// -// /pool/ext/$(POOL_UUID)/crypt/$(DATASET_TYPE)/$(BUNDLE_UUID)/bundle.zip -// | | This is a per-bundle nested dataset -// | This is a Debug dataset -// -// NOTE: The "DumpSetupWorker" has been explicitly configured to ignore these files, so they are -// not removed. If the files used here change in the future, DumpSetupWorker should also be -// updated. -pub const BUNDLE_FILE_NAME: &str = "bundle.zip"; -pub const BUNDLE_TMP_FILE_NAME_SUFFIX: &str = "bundle.zip.tmp"; - #[derive(thiserror::Error, Debug)] pub enum Error { #[error(transparent)] diff --git a/sled-agent/types/src/lib.rs b/sled-agent/types/src/lib.rs index 47e1535aded..cb95950fd50 100644 --- a/sled-agent/types/src/lib.rs +++ b/sled-agent/types/src/lib.rs @@ -13,5 +13,6 @@ pub mod instance; pub mod rack_init; pub mod rack_ops; pub mod sled; +pub mod support_bundle; pub mod time_sync; pub mod zone_bundle; diff --git a/sled-agent/types/src/support_bundle.rs b/sled-agent/types/src/support_bundle.rs new file mode 100644 index 00000000000..2cf6916834a --- /dev/null +++ b/sled-agent/types/src/support_bundle.rs @@ -0,0 +1,20 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types related to support bundles. + +// The final name of the bundle, as it is stored within the dedicated +// datasets. +// +// The full path is of the form: +// +// /pool/ext/$(POOL_UUID)/crypt/$(DATASET_TYPE)/$(BUNDLE_UUID)/bundle.zip +// | | This is a per-bundle nested dataset +// | This is a Debug dataset +// +// NOTE: The "DumpSetupWorker" has been explicitly configured to ignore these files, so they are +// not removed. If the files used here change in the future, DumpSetupWorker should also be +// updated. +pub const BUNDLE_FILE_NAME: &str = "bundle.zip"; +pub const BUNDLE_TMP_FILE_NAME_SUFFIX: &str = "bundle.zip.tmp"; From 319465ee1f7b4cbdf24e5fce4639959bf9696316 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 29 Apr 2025 13:31:21 -0400 Subject: [PATCH 2/8] hakari --- Cargo.lock | 1 + sled-agent/config-reconciler/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 6fc4d9910c1..245d0a5b830 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11494,6 +11494,7 @@ dependencies = [ "omicron-common", "omicron-test-utils", "omicron-uuid-kinds", + "omicron-workspace-hack", "sled-agent-api", "sled-agent-types", "sled-hardware", diff --git a/sled-agent/config-reconciler/Cargo.toml b/sled-agent/config-reconciler/Cargo.toml index c9079ffff2e..617924376bd 100644 --- a/sled-agent/config-reconciler/Cargo.toml +++ b/sled-agent/config-reconciler/Cargo.toml @@ -32,6 +32,7 @@ thiserror.workspace = true tokio.workspace = true tufaceous-artifact.workspace = true zone.workspace = true +omicron-workspace-hack.workspace = true [dev-dependencies] omicron-test-utils.workspace = true From 01ebe837d93dbb9a2a5596c53d7df0867c341646 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 30 Apr 2025 10:22:04 -0400 Subject: [PATCH 3/8] add clarifying comments --- sled-agent/config-reconciler/src/ledger.rs | 15 +++++++++++++++ .../src/sled_agent_facilities.rs | 8 ++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/sled-agent/config-reconciler/src/ledger.rs b/sled-agent/config-reconciler/src/ledger.rs index 9a55c476b1f..40f56420da5 100644 --- a/sled-agent/config-reconciler/src/ledger.rs +++ b/sled-agent/config-reconciler/src/ledger.rs @@ -141,6 +141,21 @@ impl LedgerTaskHandle { unimplemented!() } + /// Confirm that a new [`ArtifactConfig`] is valid given the contents of the + /// currently-ledgered [`OmicronSledConfig`]. + /// + /// In particular, this confirms that a new artifact config does not + /// _remove_ any artifacts needed by zones described by the current sled + /// config. + /// + /// The artifact store in sled agent and this task need to coordinate with + /// each other whenever changes are made to either kind of config. This + /// method provides a path for the artifact store to validate its incoming + /// artifact configs against this task, and this task uses the + /// implementation of [`SledAgentArtifactStore`] to validate its incoming + /// sled configs against the artifact store. Validation is always performed + /// by this task, which enforces serialization of the checks in the event of + /// requests arriving concurrently to change both configs. pub async fn validate_artifact_config( &self, _new_config: ArtifactConfig, diff --git a/sled-agent/config-reconciler/src/sled_agent_facilities.rs b/sled-agent/config-reconciler/src/sled_agent_facilities.rs index 36dc0c47983..40aa466bce6 100644 --- a/sled-agent/config-reconciler/src/sled_agent_facilities.rs +++ b/sled-agent/config-reconciler/src/sled_agent_facilities.rs @@ -20,8 +20,12 @@ pub trait SledAgentFacilities: Send + 'static { type MetricsUntrackZoneLinksError; type ZoneBundleCreateError; - /// Called once time is synchronized. - // TODO-cleanup should we do this work ourselves instead? + /// Called by the reconciler task to inform sled-agent that time is + /// sychronized. May be called multiple times. + // TODO-cleanup should we do this work ourselves instead? This is + // currently implemented by `ServiceManager` and does a couple one-time + // setup things (like rewrite the OS boot time). We could probably absorb + // that work and remove this callback. async fn on_time_sync(&self); /// Method to start a zone. From 608905a2e454ef7c54a14ee6c226f218aaa29ec2 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 30 Apr 2025 10:48:25 -0400 Subject: [PATCH 4/8] be less clever about internal disk sorting (explicit ID type) --- Cargo.lock | 2 + sled-agent/config-reconciler/Cargo.toml | 2 + .../config-reconciler/src/internal_disks.rs | 93 +++++++++++++++++-- 3 files changed, 90 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 245d0a5b830..95372c2de50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11495,12 +11495,14 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", + "proptest", "sled-agent-api", "sled-agent-types", "sled-hardware", "sled-storage", "slog", "slog-error-chain", + "test-strategy", "thiserror 1.0.69", "tokio", "tufaceous-artifact", diff --git a/sled-agent/config-reconciler/Cargo.toml b/sled-agent/config-reconciler/Cargo.toml index 617924376bd..88ff181650e 100644 --- a/sled-agent/config-reconciler/Cargo.toml +++ b/sled-agent/config-reconciler/Cargo.toml @@ -36,6 +36,8 @@ omicron-workspace-hack.workspace = true [dev-dependencies] omicron-test-utils.workspace = true +proptest.workspace = true +test-strategy.workspace = true [features] testing = [] diff --git a/sled-agent/config-reconciler/src/internal_disks.rs b/sled-agent/config-reconciler/src/internal_disks.rs index c63feff4441..248c17084b1 100644 --- a/sled-agent/config-reconciler/src/internal_disks.rs +++ b/sled-agent/config-reconciler/src/internal_disks.rs @@ -10,6 +10,7 @@ //! * During sled-agent startup, before trust quorum has unlocked use camino::Utf8PathBuf; +use core::cmp; use id_map::IdMap; use id_map::IdMappable; use omicron_common::disk::DiskIdentity; @@ -252,15 +253,13 @@ struct InternalDiskDetails { } impl IdMappable for InternalDiskDetails { - type Id = (bool, Arc); + type Id = InternalDiskDetailsId; fn id(&self) -> Self::Id { - // Using `!self.is_boot_disk` as the first part of the ID allows us to - // guarantee we always iterate over the internal disks with the current - // boot disk first. - // - // TODO-cleanup add tests for this - (!self.is_boot_disk, Arc::clone(&self.identity)) + InternalDiskDetailsId { + identity: Arc::clone(&self.identity), + is_boot_disk: self.is_boot_disk, + } } } @@ -291,6 +290,36 @@ impl From<&'_ Disk> for InternalDiskDetails { } } +// Special ID type for `InternalDiskDetails` that lets us guarantee we sort boot +// disks ahead of non-boot disks. There's a whole set of thorny problems here +// about what to do if we think both internal disks should have the same +// contents but they disagree; we'll at least try to have callers prefer the +// boot disk if they're performing a "check the first disk that succeeds" kind +// of operation. +#[derive(Debug, Clone, PartialEq, Eq)] +struct InternalDiskDetailsId { + is_boot_disk: bool, + identity: Arc, +} + +impl cmp::Ord for InternalDiskDetailsId { + fn cmp(&self, other: &Self) -> cmp::Ordering { + match self.is_boot_disk.cmp(&other.is_boot_disk) { + cmp::Ordering::Equal => {} + // `false` normally sorts before `true`, so intentionally reverse + // this so that we sort boot disks ahead of non-boot disks. + ord => return ord.reverse(), + } + self.identity.cmp(&other.identity) + } +} + +impl cmp::PartialOrd for InternalDiskDetailsId { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + struct InternalDisksTask { disks_tx: watch::Sender>>, raw_disks_rx: watch::Receiver>, @@ -307,3 +336,53 @@ impl InternalDisksTask { unimplemented!() } } + +#[cfg(test)] +mod tests { + use super::*; + use omicron_uuid_kinds::ZpoolUuid; + use proptest::sample::size_range; + use test_strategy::Arbitrary; + use test_strategy::proptest; + + #[derive(Debug, Arbitrary)] + struct ArbitraryInternalDiskDetailsId { + is_boot_disk: bool, + vendor: String, + model: String, + serial: String, + } + + #[proptest] + fn boot_disks_sort_ahead_of_non_boot_disks_in_id_map( + #[any(size_range(2..4).lift())] values: Vec< + ArbitraryInternalDiskDetailsId, + >, + ) { + let disk_map: IdMap<_> = values + .into_iter() + .map(|value| InternalDiskDetails { + identity: Arc::new(DiskIdentity { + vendor: value.vendor, + model: value.model, + serial: value.serial, + }), + zpool_name: ZpoolName::new_internal(ZpoolUuid::new_v4()), + is_boot_disk: value.is_boot_disk, + slot: None, + raw_devfs_path: None, + }) + .collect(); + + // When iterating over the contents, we should never see a boot disk + // after a non-boot disk. + let mut saw_non_boot_disk = false; + for disk in disk_map.iter() { + if disk.is_boot_disk { + assert!(!saw_non_boot_disk); + } else { + saw_non_boot_disk = true; + } + } + } +} From a41323acb4fe34ebb645f306ed8d3bd54b25884b Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 30 Apr 2025 14:39:55 -0400 Subject: [PATCH 5/8] typo --- Cargo.lock | 2 +- sled-agent/config-reconciler/src/handle.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 88a33f9a0e9..b2781ed7d34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11429,7 +11429,7 @@ dependencies = [ "camino-tempfile", "chrono", "derive_more", - "dropshot 0.16.0", + "dropshot", "glob", "id-map", "illumos-utils", diff --git a/sled-agent/config-reconciler/src/handle.rs b/sled-agent/config-reconciler/src/handle.rs index 272f53a8831..41d1a4f16c4 100644 --- a/sled-agent/config-reconciler/src/handle.rs +++ b/sled-agent/config-reconciler/src/handle.rs @@ -84,7 +84,7 @@ impl ConfigReconcilerHandle { /// /// The config reconciler subsystem splits initialization into two phases: /// the main reconcilation task will not be spawned until - /// `spawn_reconciliation_task()` is called on the return handle. + /// `spawn_reconciliation_task()` is called on the returned handle. /// `spawn_reconciliation_task()` cannot be called by sled-agent proper /// until rack setup has occurred (or sled-agent has found its config from a /// prior rack setup, during a cold boot). From 3f84a04808c5d23cbd8fe0ee26c799d1a979c14b Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 30 Apr 2025 14:46:38 -0400 Subject: [PATCH 6/8] rework call-once-ness of spawn_reconciliation_task --- sled-agent/config-reconciler/src/handle.rs | 93 +++++++++++----------- sled-agent/config-reconciler/src/lib.rs | 1 + 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/sled-agent/config-reconciler/src/handle.rs b/sled-agent/config-reconciler/src/handle.rs index 41d1a4f16c4..28d22bfaa2c 100644 --- a/sled-agent/config-reconciler/src/handle.rs +++ b/sled-agent/config-reconciler/src/handle.rs @@ -17,9 +17,7 @@ use sled_storage::manager::NestedDatasetConfig; use sled_storage::manager::NestedDatasetListOptions; use sled_storage::manager::NestedDatasetLocation; use slog::Logger; -use slog::warn; use std::sync::Arc; -use std::sync::Mutex; use std::sync::OnceLock; use tokio::sync::watch; @@ -59,6 +57,16 @@ pub enum TimeSyncConfig { Skip, } +#[derive(Debug)] +pub struct ConfigReconcilerSpawnToken { + key_requester: StorageKeyRequester, + time_sync_config: TimeSyncConfig, + reconciler_result_tx: watch::Sender, + currently_managed_zpools_tx: watch::Sender>, + ledger_task_log: Logger, + reconciler_task_log: Logger, +} + #[derive(Debug)] pub struct ConfigReconcilerHandle { raw_disks_tx: RawDisksSender, @@ -67,15 +75,8 @@ pub struct ConfigReconcilerHandle { reconciler_result_rx: watch::Receiver, currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, - // `None` until `spawn_reconciliation_task()` is called. + // Empty until `spawn_reconciliation_task()` is called. ledger_task: OnceLock, - - // We have a two-phase initialization: we get some of our dependencies when - // `Self::new()` is called and the rest when - // `Self::spawn_reconciliation_task()` is called. We hold the dependencies - // that are available in `new` but not needed until - // `spawn_reconciliation_task` in this field. - reconciler_task_dependencies: Mutex>, } impl ConfigReconcilerHandle { @@ -93,7 +94,7 @@ impl ConfigReconcilerHandle { key_requester: StorageKeyRequester, time_sync_config: TimeSyncConfig, base_log: &Logger, - ) -> Self { + ) -> (Self, ConfigReconcilerSpawnToken) { let mount_config = Arc::new(mount_config); // Spawn the task that monitors our internal disks (M.2s). @@ -111,14 +112,27 @@ impl ConfigReconcilerHandle { base_log, ); - // Stash the dependencies the reconciler task will need in - // `spawn_reconciliation_task()`. let (reconciler_result_tx, reconciler_result_rx) = watch::channel(ReconcilerResult::default()); let (currently_managed_zpools_tx, currently_managed_zpools_rx) = watch::channel(Arc::default()); - let reconciler_task_dependencies = - Mutex::new(Some(ReconcilerTaskDependencies { + + ( + Self { + raw_disks_tx, + internal_disks_rx, + dataset_task, + ledger_task: OnceLock::new(), + reconciler_result_rx, + currently_managed_zpools_rx: + CurrentlyManagedZpoolsReceiver::new( + currently_managed_zpools_rx, + ), + }, + // Stash the dependencies the reconciler task will need in + // `spawn_reconciliation_task()` inside this token that the caller + // has to hold until it has the other outside dependencies ready. + ConfigReconcilerSpawnToken { key_requester, time_sync_config, reconciler_result_tx, @@ -127,25 +141,19 @@ impl ConfigReconcilerHandle { .new(slog::o!("component" => "SledConfigLedgerTask")), reconciler_task_log: base_log .new(slog::o!("component" => "ConfigReconcilerTask")), - })); - - Self { - raw_disks_tx, - internal_disks_rx, - dataset_task, - ledger_task: OnceLock::new(), - reconciler_result_rx, - currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver::new( - currently_managed_zpools_rx, - ), - reconciler_task_dependencies, - } + }, + ) } /// Spawn the primary config reconciliation task. /// - /// This method can effectively only be called once; any subsequent calls - /// will log a warning and do nothing. + /// This method can effectively only be called once, because the caller must + /// supply the `token` returned by `new()` when this handle was created. + /// + /// # Panics + /// + /// Panics if called multiple times, which is statically impossible outside + /// shenanigans to get a second `ConfigReconcilerHandleSpawnToken`. pub fn spawn_reconciliation_task< T: SledAgentFacilities, U: SledAgentArtifactStore, @@ -154,26 +162,16 @@ impl ConfigReconcilerHandle { underlay_vnic: EtherstubVnic, sled_agent_facilities: T, sled_agent_artifact_store: U, - log: &Logger, + token: ConfigReconcilerSpawnToken, ) { - let ReconcilerTaskDependencies { + let ConfigReconcilerSpawnToken { key_requester, time_sync_config, reconciler_result_tx, currently_managed_zpools_tx, ledger_task_log, reconciler_task_log, - } = match self.reconciler_task_dependencies.lock().unwrap().take() { - Some(dependencies) => dependencies, - None => { - warn!( - log, - "spawn_reconciliation_task() called multiple times \ - (ignored after first call)" - ); - return; - } - }; + } = token; // Spawn the task that manages our config ledger. let (ledger_task, current_config_rx) = @@ -184,10 +182,13 @@ impl ConfigReconcilerHandle { ); match self.ledger_task.set(ledger_task) { Ok(()) => (), - // We know via the `lock().take()` above that this only executes - // once, so `ledger_task` is always set here. + // We can only be called with the `token` we returned in `new()` and + // we document that we panic if called multiple times via some + // multi-token shenanigans. Err(_) => { - unreachable!("spawn_reconciliation_task() only executes once") + panic!( + "spawn_reconciliation_task() called with multiple tokens" + ) } } diff --git a/sled-agent/config-reconciler/src/lib.rs b/sled-agent/config-reconciler/src/lib.rs index 01ab8e0ed39..861bca3d7c2 100644 --- a/sled-agent/config-reconciler/src/lib.rs +++ b/sled-agent/config-reconciler/src/lib.rs @@ -65,6 +65,7 @@ pub mod dump_setup; pub use dataset_serialization_task::DatasetTaskError; pub use handle::AvailableDatasetsReceiver; pub use handle::ConfigReconcilerHandle; +pub use handle::ConfigReconcilerSpawnToken; pub use handle::ReconcilerInventory; pub use handle::TimeSyncConfig; pub use internal_disks::InternalDisks; From 996a1026f86afc1f57b6ccc7713606c6e62ab557 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 30 Apr 2025 15:20:16 -0400 Subject: [PATCH 7/8] typo --- sled-agent/config-reconciler/src/handle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/config-reconciler/src/handle.rs b/sled-agent/config-reconciler/src/handle.rs index 28d22bfaa2c..cf435d7e0de 100644 --- a/sled-agent/config-reconciler/src/handle.rs +++ b/sled-agent/config-reconciler/src/handle.rs @@ -153,7 +153,7 @@ impl ConfigReconcilerHandle { /// # Panics /// /// Panics if called multiple times, which is statically impossible outside - /// shenanigans to get a second `ConfigReconcilerHandleSpawnToken`. + /// shenanigans to get a second [`ConfigReconcilerSpawnToken`]. pub fn spawn_reconciliation_task< T: SledAgentFacilities, U: SledAgentArtifactStore, From 16a4c5675d86bcda0a448e8ffdda0b327cdeba13 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 30 Apr 2025 16:06:45 -0400 Subject: [PATCH 8/8] make trait futures Send --- .../src/sled_agent_facilities.rs | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/sled-agent/config-reconciler/src/sled_agent_facilities.rs b/sled-agent/config-reconciler/src/sled_agent_facilities.rs index 40aa466bce6..8f5858056ed 100644 --- a/sled-agent/config-reconciler/src/sled_agent_facilities.rs +++ b/sled-agent/config-reconciler/src/sled_agent_facilities.rs @@ -12,40 +12,36 @@ use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use sled_agent_types::zone_bundle::ZoneBundleCause; use sled_storage::config::MountConfig; +use std::future::Future; use tufaceous_artifact::ArtifactHash; -#[allow(async_fn_in_trait)] // TODO confirm this makes sense and explain pub trait SledAgentFacilities: Send + 'static { - type StartZoneError; - type MetricsUntrackZoneLinksError; - type ZoneBundleCreateError; - /// Called by the reconciler task to inform sled-agent that time is /// sychronized. May be called multiple times. // TODO-cleanup should we do this work ourselves instead? This is // currently implemented by `ServiceManager` and does a couple one-time // setup things (like rewrite the OS boot time). We could probably absorb // that work and remove this callback. - async fn on_time_sync(&self); + fn on_time_sync(&self) -> impl Future + Send; /// Method to start a zone. // TODO-cleanup This is implemented by // `ServiceManager::start_omicron_zone()`, which does too much; we should // absorb some of its functionality and shrink this interface. We definitely // should not need to pass the full list of U2 zpools. - async fn start_omicron_zone( + fn start_omicron_zone( &self, zone_config: &OmicronZoneConfig, mount_config: &MountConfig, is_time_synchronized: bool, all_u2_pools: &[ZpoolName], - ) -> Result; + ) -> impl Future> + Send; /// Stop tracking metrics for a zone's datalinks. fn metrics_untrack_zone_links( &self, zone: &RunningZone, - ) -> Result<(), Self::MetricsUntrackZoneLinksError>; + ) -> anyhow::Result<()>; /// Instruct DDM to start advertising a prefix. fn ddm_add_internal_dns_prefix(&self, prefix: Ipv6Subnet); @@ -54,20 +50,17 @@ pub trait SledAgentFacilities: Send + 'static { fn ddm_remove_internal_dns_prefix(&self, prefix: Ipv6Subnet); /// Create a zone bundle. - async fn zone_bundle_create( + fn zone_bundle_create( &self, zone: &RunningZone, cause: ZoneBundleCause, - ) -> Result<(), Self::ZoneBundleCreateError>; + ) -> impl Future> + Send; } -#[allow(async_fn_in_trait)] // TODO confirm this makes sense and explain pub trait SledAgentArtifactStore: Send + 'static { - type ArtifactExistsValidationError; - /// Check an artifact exists in the TUF Repo Depot storage. - async fn validate_artifact_exists_in_storage( + fn validate_artifact_exists_in_storage( &self, artifact: ArtifactHash, - ) -> Result<(), Self::ArtifactExistsValidationError>; + ) -> impl Future> + Send; }