Skip to content

Commit

Permalink
Support copying layers in detach_ancestor from before shard splits (#…
Browse files Browse the repository at this point in the history
…9669)

We need to use the shard associated with the layer file, not the shard
associated with our current tenant shard ID.

Due to shard splits, the shard IDs can refer to older files.

close #9667
  • Loading branch information
arpad-m authored Nov 7, 2024
1 parent 2a95a51 commit 011c0a1
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 12 deletions.
6 changes: 3 additions & 3 deletions pageserver/src/tenant/remote_timeline_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1445,7 +1445,7 @@ impl RemoteTimelineClient {
let remote_path = remote_layer_path(
&self.tenant_shard_id.tenant_id,
&self.timeline_id,
self.tenant_shard_id.to_index(),
uploaded.metadata().shard,
&uploaded.layer_desc().layer_name(),
uploaded.metadata().generation,
);
Expand Down Expand Up @@ -1486,15 +1486,15 @@ impl RemoteTimelineClient {
&adopted
.get_timeline_id()
.expect("Source timeline should be alive"),
self.tenant_shard_id.to_index(),
adopted.metadata().shard,
&adopted.layer_desc().layer_name(),
adopted.metadata().generation,
);

let target_remote_path = remote_layer_path(
&self.tenant_shard_id.tenant_id,
&self.timeline_id,
self.tenant_shard_id.to_index(),
adopted_as.metadata().shard,
&adopted_as.layer_desc().layer_name(),
adopted_as.metadata().generation,
);
Expand Down
14 changes: 11 additions & 3 deletions pageserver/src/tenant/timeline/detach_ancestor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::{
virtual_file::{MaybeFatalIo, VirtualFile},
};
use anyhow::Context;
use pageserver_api::models::detach_ancestor::AncestorDetached;
use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity};
use tokio::sync::Semaphore;
use tokio_util::sync::CancellationToken;
use tracing::Instrument;
Expand Down Expand Up @@ -376,8 +376,14 @@ pub(super) async fn prepare(
tasks.spawn(
async move {
let _permit = limiter.acquire().await;
let owned =
remote_copy(&adopted, &timeline, timeline.generation, &timeline.cancel).await?;
let owned = remote_copy(
&adopted,
&timeline,
timeline.generation,
timeline.shard_identity,
&timeline.cancel,
)
.await?;
tracing::info!(layer=%owned, "remote copied");
Ok(owned)
}
Expand Down Expand Up @@ -629,13 +635,15 @@ async fn remote_copy(
adopted: &Layer,
adoptee: &Arc<Timeline>,
generation: Generation,
shard_identity: ShardIdentity,
cancel: &CancellationToken,
) -> Result<Layer, Error> {
// depending if Layer::keep_resident we could hardlink

let mut metadata = adopted.metadata();
debug_assert!(metadata.generation <= generation);
metadata.generation = generation;
metadata.shard = shard_identity.shard_index();

let owned = crate::tenant::storage_layer::Layer::for_evicted(
adoptee.conf,
Expand Down
41 changes: 35 additions & 6 deletions test_runner/regress/test_timeline_detach_ancestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
NeonEnvBuilder,
PgBin,
flush_ep_to_pageserver,
last_flush_lsn_upload,
wait_for_last_flush_lsn,
)
from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException
Expand Down Expand Up @@ -576,27 +577,49 @@ def delta_layers(timeline_id: TimelineId):
assert_pageserver_backups_equal(fullbackup_before, fullbackup_after, set())


@pytest.mark.parametrize("sharded", [True, False])
@pytest.mark.parametrize("shards_initial_after", [(1, 1), (2, 2), (1, 4)])
def test_timeline_ancestor_detach_idempotent_success(
neon_env_builder: NeonEnvBuilder, sharded: bool
neon_env_builder: NeonEnvBuilder, shards_initial_after: tuple[int, int]
):
shards = 2 if sharded else 1
shards_initial = shards_initial_after[0]
shards_after = shards_initial_after[1]

neon_env_builder.num_pageservers = shards
env = neon_env_builder.init_start(initial_tenant_shard_count=shards if sharded else None)
neon_env_builder.num_pageservers = shards_after
env = neon_env_builder.init_start(
initial_tenant_shard_count=shards_initial if shards_initial > 1 else None,
initial_tenant_conf={
# small checkpointing and compaction targets to ensure we generate many upload operations
"checkpoint_distance": 512 * 1024,
"compaction_threshold": 1,
"compaction_target_size": 512 * 1024,
# disable background compaction and GC. We invoke it manually when we want it to happen.
"gc_period": "0s",
"compaction_period": "0s",
},
)

pageservers = dict((int(p.id), p) for p in env.pageservers)

for ps in pageservers.values():
ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)

if sharded:
if shards_after > 1:
# FIXME: should this be in the neon_env_builder.init_start?
env.storage_controller.reconcile_until_idle()
client = env.storage_controller.pageserver_api()
else:
client = env.pageserver.http_client()

# Write some data so that we have some layers to copy
with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as endpoint:
endpoint.safe_psql_many(
[
"CREATE TABLE foo(key serial primary key, t text default 'data_content')",
"INSERT INTO foo SELECT FROM generate_series(1,1024)",
]
)
last_flush_lsn_upload(env, endpoint, env.initial_tenant, env.initial_timeline)

first_branch = env.create_branch("first_branch")

_ = env.create_branch("second_branch", ancestor_branch_name="first_branch")
Expand All @@ -607,6 +630,12 @@ def test_timeline_ancestor_detach_idempotent_success(
reparented1 = env.create_branch("first_reparented", ancestor_branch_name="main")
reparented2 = env.create_branch("second_reparented", ancestor_branch_name="main")

if shards_after > shards_initial:
# Do a shard split
# This is a reproducer for https://github.com/neondatabase/neon/issues/9667
env.storage_controller.tenant_shard_split(env.initial_tenant, shards_after)
env.storage_controller.reconcile_until_idle()

first_reparenting_response = client.detach_ancestor(env.initial_tenant, first_branch)
assert set(first_reparenting_response) == {reparented1, reparented2}

Expand Down

1 comment on commit 011c0a1

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5428 tests run: 5195 passed, 1 failed, 232 skipped (full report)


Failures on Postgres 16

  • test_basebackup_with_high_slru_count[github-actions-selfhosted-10-13-30]: release-x86-64
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_basebackup_with_high_slru_count[release-pg16-github-actions-selfhosted-10-13-30]"

Code coverage* (full report)

  • functions: 31.7% (7846 of 24761 functions)
  • lines: 49.4% (62084 of 125797 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
011c0a1 at 2024-11-07T02:46:30.071Z :recycle:

Please sign in to comment.