Skip to content

Commit

Permalink
dispatch all manfiest uploads via store_tenant_manifest
Browse files Browse the repository at this point in the history
  • Loading branch information
jcsp committed Oct 29, 2024
1 parent fcf2a6d commit c01aa89
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 69 deletions.
41 changes: 15 additions & 26 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,9 @@ pub struct Tenant {
/// **Lock order**: if acquiring all (or a subset), acquire them in order `timelines`, `timelines_offloaded`, `timelines_creating`
timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,

/// Serialize writes of the tenant manifest to remote storage
/// Serialize writes of the tenant manifest to remote storage. If there are concurrent operations
/// affecting the manifest, such as timeline deletion and timeline offload, they must wait for
/// each other (this could be optimized to coalesce writes if necessary).
tenant_manifest_upload: tokio::sync::Mutex<()>,

// This mutex prevents creation of new timelines during GC.
Expand Down Expand Up @@ -761,6 +763,15 @@ pub(crate) enum TenantManifestError {
Cancelled,
}

impl From<TenantManifestError> for TimelineArchivalError {
fn from(e: TenantManifestError) -> Self {
match e {
TenantManifestError::RemoteStorage(e) => Self::Other(e),
TenantManifestError::Cancelled => Self::Cancelled,
}
}
}

impl Debug for TimelineArchivalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand Down Expand Up @@ -1546,18 +1557,7 @@ impl Tenant {
offloaded_timelines_accessor.extend(offloaded_timelines_list.into_iter());
}
if !offloaded_timeline_ids.is_empty() {
let manifest = self.tenant_manifest();
// TODO: generation support
let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
upload_tenant_manifest(
&self.remote_storage,
&self.tenant_shard_id,
generation,
&manifest,
&self.cancel,
)
.await
.map_err(TimelineArchivalError::Other)?;
self.store_tenant_manifest().await?;
}

// The local filesystem contents are a cache of what's in the remote IndexPart;
Expand Down Expand Up @@ -1926,18 +1926,7 @@ impl Tenant {
};

// Upload new list of offloaded timelines to S3
let manifest = self.tenant_manifest();
// TODO: generation support
let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
upload_tenant_manifest(
&self.remote_storage,
&self.tenant_shard_id,
generation,
&manifest,
&cancel,
)
.await
.map_err(TimelineArchivalError::Other)?;
self.store_tenant_manifest().await?;

// Activate the timeline (if it makes sense)
if !(timeline.is_broken() || timeline.is_stopping()) {
Expand Down Expand Up @@ -3329,7 +3318,7 @@ impl Tenant {
.unwrap_or(self.conf.default_tenant_conf.lsn_lease_length)
}

pub(crate) fn tenant_manifest(&self) -> TenantManifest {
fn tenant_manifest(&self) -> TenantManifest {
let timelines_offloaded = self.timelines_offloaded.lock().unwrap();

let mut timeline_manifests = timelines_offloaded
Expand Down
2 changes: 1 addition & 1 deletion pageserver/src/tenant/remote_timeline_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ pub(crate) use download::{
list_remote_tenant_shards, list_remote_timelines,
};
pub(crate) use index::LayerFileMetadata;
pub(crate) use upload::{upload_initdb_dir, upload_tenant_manifest};
pub(crate) use upload::upload_initdb_dir;

// Occasional network issues and such can cause remote operations to fail, and
// that's expected. If a download fails, we log it at info-level, and retry.
Expand Down
40 changes: 10 additions & 30 deletions pageserver/src/tenant/timeline/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ use crate::{
task_mgr::{self, TaskKind},
tenant::{
metadata::TimelineMetadata,
remote_timeline_client::{
self, PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
},
remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded,
},
};
Expand Down Expand Up @@ -176,32 +174,6 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
Ok(())
}

/// It is important that this gets called when DeletionGuard is being held.
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
async fn upload_new_tenant_manifest(
tenant: &Tenant,
_: &DeletionGuard, // using it as a witness
) -> anyhow::Result<()> {
// This is susceptible to race conditions, i.e. we won't continue deletions if there is a crash
// between the deletion of the index-part.json and reaching of this code.
// So indeed, the tenant manifest might refer to an offloaded timeline which has already been deleted.
// However, we handle this case in tenant loading code so the next time we attach, the issue is
// resolved.
let manifest = tenant.tenant_manifest();
// TODO: generation support
let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
remote_timeline_client::upload_tenant_manifest(
&tenant.remote_storage,
&tenant.tenant_shard_id,
generation,
&manifest,
&tenant.cancel,
)
.await?;

Ok(())
}

/// Orchestrates timeline shut down of all timeline tasks, removes its in-memory structures,
/// and deletes its data from both disk and s3.
/// The sequence of steps:
Expand Down Expand Up @@ -455,7 +427,15 @@ impl DeleteTimelineFlow {

remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;

upload_new_tenant_manifest(tenant, &guard).await?;
// This is susceptible to race conditions, i.e. we won't continue deletions if there is a crash
// between the deletion of the index-part.json and reaching of this code.
// So indeed, the tenant manifest might refer to an offloaded timeline which has already been deleted.
// However, we handle this case in tenant loading code so the next time we attach, the issue is
// resolved.
tenant
.store_tenant_manifest()
.await
.map_err(|e| DeleteTimelineError::Other(anyhow::anyhow!(e)))?;

*guard = Self::Finished;

Expand Down
17 changes: 5 additions & 12 deletions pageserver/src/tenant/timeline/offload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::sync::Arc;
use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
use super::Timeline;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::tenant::{remote_timeline_client, OffloadedTimeline, Tenant, TimelineOrOffloaded};
use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};

pub(crate) async fn offload_timeline(
tenant: &Tenant,
Expand Down Expand Up @@ -63,17 +63,10 @@ pub(crate) async fn offload_timeline(
// at the next restart attach it again.
// For that to happen, we'd need to make the manifest reflect our *intended* state,
// not our actual state of offloaded timelines.
let manifest = tenant.tenant_manifest();
// TODO: generation support
let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
remote_timeline_client::upload_tenant_manifest(
&tenant.remote_storage,
&tenant.tenant_shard_id,
generation,
&manifest,
&tenant.cancel,
)
.await?;
tenant
.store_tenant_manifest()
.await
.map_err(|e| anyhow::anyhow!(e))?;

Ok(())
}
Expand Down

0 comments on commit c01aa89

Please sign in to comment.