diff --git a/io-engine/src/bdev/nexus/nexus_persistence.rs b/io-engine/src/bdev/nexus/nexus_persistence.rs index 7aaf87845..e99d84e1e 100644 --- a/io-engine/src/bdev/nexus/nexus_persistence.rs +++ b/io-engine/src/bdev/nexus/nexus_persistence.rs @@ -103,9 +103,12 @@ impl<'n> Nexus<'n> { }; nexus_info.children.push(child_info); }); - // We started with this child because it was healthy in etcd, or isn't there at all. - // Being unhealthy here means it is undergoing a fault/retire before nexus is open. - if nexus_info.children.len() == 1 && !nexus_info.children[0].healthy { + // We started with this child because it was healthy in etcd, or + // isn't there at all. Being unhealthy here + // means it is undergoing a fault/retire before nexus is open. + if nexus_info.children.len() == 1 + && !nexus_info.children[0].healthy + { warn!("{self:?} Not persisting: the only child went unhealthy during nexus creation"); return Err(Error::NexusCreate { name: self.name.clone(), @@ -211,6 +214,7 @@ impl<'n> Nexus<'n> { }; let mut retry = PersistentStore::retries(); + let mut logged = false; loop { let Err(err) = PersistentStore::put(&key, &info.inner).await else { trace!(?key, "{self:?}: the state was saved successfully"); @@ -225,10 +229,13 @@ impl<'n> Nexus<'n> { }); } - error!( - "{self:?}: failed to persist nexus information, \ - will retry ({retry} left): {err}" - ); + if !logged { + error!( + "{self:?}: failed to persist nexus information, \ + will silently retry ({retry} left): {err}" + ); + logged = true; + } // Allow some time for the connection to the persistent // store to be re-established before retrying the operation. diff --git a/io-engine/src/core/env.rs b/io-engine/src/core/env.rs index 3c88c1823..8f3490201 100644 --- a/io-engine/src/core/env.rs +++ b/io-engine/src/core/env.rs @@ -174,7 +174,7 @@ pub struct MayastorCliArgs { pub ps_timeout: Duration, #[clap(long = "ps-retries", default_value = "30")] /// Persistent store operation retries. - pub ps_retries: u8, + pub ps_retries: u16, #[clap(long = "bdev-pool-size", default_value = "65535")] /// Number of entries in memory pool for bdev I/O contexts pub bdev_io_ctx_pool_size: u64, @@ -374,7 +374,7 @@ pub struct MayastorEnvironment { pub registration_endpoint: Option, ps_endpoint: Option, ps_timeout: Duration, - ps_retries: u8, + ps_retries: u16, mayastor_config: Option, ptpl_dir: Option, pool_config: Option, diff --git a/io-engine/src/persistent_store.rs b/io-engine/src/persistent_store.rs index 8f5f334bc..0743470ae 100644 --- a/io-engine/src/persistent_store.rs +++ b/io-engine/src/persistent_store.rs @@ -31,7 +31,7 @@ pub struct PersistentStoreBuilder { /// Operation timeout. timeout: Duration, /// Number of operation retries. - retries: u8, + retries: u16, } impl Default for PersistentStoreBuilder { @@ -74,7 +74,7 @@ impl PersistentStoreBuilder { } /// Sets number of operation retries. - pub fn with_retries(mut self, retries: u8) -> Self { + pub fn with_retries(mut self, retries: u16) -> Self { self.retries = retries; self } @@ -96,7 +96,7 @@ pub struct PersistentStore { /// Operation timeout. timeout: Duration, /// Number of operation retries. - retries: u8, + retries: u16, } /// Persistent store global instance. @@ -311,7 +311,7 @@ impl PersistentStore { } /// Gets the number of operation retries. - pub fn retries() -> u8 { + pub fn retries() -> u16 { Self::instance().lock().retries }