From 7f307511d594be3768033fc93ee15342bc27f913 Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Mon, 9 Dec 2019 20:04:51 -0800 Subject: [PATCH 1/3] libsnapshot: add GetCurrentSlot Factor out obscure logic that reads boot indicator path. GetCurrentSlot() returns three states: - None (read failure) - Old (before reboot) - New (after reboot) Use these "logical" slot values for simpler code. Test: libsnapshot_test Change-Id: I1fa2ce4916c5a1652d25682ec1f11e101c858822 Former-commit-id: c64cefc027e63d4d35056302f874507887387421 --- .../include/libsnapshot/snapshot.h | 4 + fs_mgr/libsnapshot/snapshot.cpp | 81 ++++++++++--------- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 5738b9633..eb48a761a 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -496,6 +496,10 @@ class SnapshotManager final { // as a sanity check. bool EnsureNoOverflowSnapshot(LockedFile* lock); + enum class Slot { Unknown, Source, Target }; + friend std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot); + Slot GetCurrentSlot(); + std::string gsid_dir_; std::string metadata_dir_; std::unique_ptr device_; diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index f38db4312..ce960f90d 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -171,14 +171,9 @@ bool SnapshotManager::TryCancelUpdate(bool* needs_merge) { if (state == UpdateState::Unverified) { // We completed an update, but it can still be canceled if we haven't booted into it. - auto boot_file = GetSnapshotBootIndicatorPath(); - std::string contents; - if (!android::base::ReadFileToString(boot_file, &contents)) { - PLOG(WARNING) << "Cannot read " << boot_file << ", proceed to canceling the update:"; - return RemoveAllUpdateState(file.get()); - } - if (device_->GetSlotSuffix() == contents) { - LOG(INFO) << "Canceling a previously completed update"; + auto slot = GetCurrentSlot(); + if (slot != Slot::Target) { + LOG(INFO) << "Canceling previously completed updates (if any)"; return RemoveAllUpdateState(file.get()); } } @@ -186,6 +181,19 @@ bool SnapshotManager::TryCancelUpdate(bool* needs_merge) { return true; } +SnapshotManager::Slot SnapshotManager::GetCurrentSlot() { + auto boot_file = GetSnapshotBootIndicatorPath(); + std::string contents; + if (!android::base::ReadFileToString(boot_file, &contents)) { + PLOG(WARNING) << "Cannot read " << boot_file; + return Slot::Unknown; + } + if (device_->GetSlotSuffix() == contents) { + return Slot::Source; + } + return Slot::Target; +} + bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock) { if (!RemoveAllSnapshots(lock)) { LOG(ERROR) << "Could not remove all snapshots"; @@ -505,15 +513,9 @@ bool SnapshotManager::InitiateMerge() { return false; } - std::string old_slot; - auto boot_file = GetSnapshotBootIndicatorPath(); - if (!android::base::ReadFileToString(boot_file, &old_slot)) { - LOG(ERROR) << "Could not determine the previous slot; aborting merge"; - return false; - } - auto new_slot = device_->GetSlotSuffix(); - if (new_slot == old_slot) { - LOG(ERROR) << "Device cannot merge while booting off old slot " << old_slot; + auto slot = GetCurrentSlot(); + if (slot != Slot::Target) { + LOG(ERROR) << "Device cannot merge while not booting from new slot"; return false; } @@ -1097,13 +1099,11 @@ bool SnapshotManager::CollapseSnapshotDevice(const std::string& name, } bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock) { - std::string old_slot; - auto boot_file = GetSnapshotBootIndicatorPath(); - if (!android::base::ReadFileToString(boot_file, &old_slot)) { - PLOG(ERROR) << "Unable to read the snapshot indicator file: " << boot_file; + auto slot = GetCurrentSlot(); + if (slot == Slot::Unknown) { return false; } - if (device_->GetSlotSuffix() != old_slot) { + if (slot == Slot::Target) { // We're booted into the target slot, which means we just rebooted // after applying the update. if (!HandleCancelledUpdateOnNewSlot(lock)) { @@ -1271,14 +1271,9 @@ bool SnapshotManager::NeedSnapshotsInFirstStageMount() { // ultimately we'll fail to boot. Why not make it a fatal error and have // the reason be clearer? Because the indicator file still exists, and // if this was FATAL, reverting to the old slot would be broken. - std::string old_slot; - auto boot_file = GetSnapshotBootIndicatorPath(); - if (!android::base::ReadFileToString(boot_file, &old_slot)) { - PLOG(ERROR) << "Unable to read the snapshot indicator file: " << boot_file; - return false; - } - if (device_->GetSlotSuffix() == old_slot) { - LOG(INFO) << "Detected slot rollback, will not mount snapshots."; + auto slot = GetCurrentSlot(); + if (slot != Slot::Target) { + LOG(INFO) << "Not booting from new slot. Will not mount snapshots."; return false; } @@ -2156,6 +2151,17 @@ bool SnapshotManager::UnmapAllPartitions() { return ok; } +std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) { + switch (slot) { + case SnapshotManager::Slot::Unknown: + return os << "unknown"; + case SnapshotManager::Slot::Source: + return os << "source"; + case SnapshotManager::Slot::Target: + return os << "target"; + } +} + bool SnapshotManager::Dump(std::ostream& os) { // Don't actually lock. Dump() is for debugging purposes only, so it is okay // if it is racy. @@ -2166,11 +2172,8 @@ bool SnapshotManager::Dump(std::ostream& os) { ss << "Update state: " << ReadUpdateState(file.get()) << std::endl; - auto boot_file = GetSnapshotBootIndicatorPath(); - std::string boot_indicator; - if (android::base::ReadFileToString(boot_file, &boot_indicator)) { - ss << "Boot indicator: old slot = " << boot_indicator << std::endl; - } + ss << "Current slot: " << device_->GetSlotSuffix() << std::endl; + ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl; bool ok = true; std::vector snapshots; @@ -2283,11 +2286,9 @@ bool SnapshotManager::HandleImminentDataWipe(const std::function& callba // // Since the rollback is inevitable, we don't treat a HAL failure // as an error here. - std::string old_slot; - auto boot_file = GetSnapshotBootIndicatorPath(); - if (android::base::ReadFileToString(boot_file, &old_slot) && - device_->GetSlotSuffix() != old_slot) { - LOG(ERROR) << "Reverting to slot " << old_slot << " since update will be deleted."; + auto slot = GetCurrentSlot(); + if (slot == Slot::Target) { + LOG(ERROR) << "Reverting to old slot since update will be deleted."; device_->SetSlotAsUnbootable(slot_number); } break; From b7eadb051d279098500cc72ec11749f058320bbe Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Fri, 13 Dec 2019 19:42:52 -0800 Subject: [PATCH 2/3] libsnapshot: SnapshotUpdateTest::AddOperation Reusable test code. Test: libsnapshot_test Change-Id: I00a1a460d84c01b0dbd24b668293df87962e8b00 Former-commit-id: c810f1091a45f35d3f5e1fcbc9ff25848b55353a --- fs_mgr/libsnapshot/snapshot_test.cpp | 50 ++++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 964b21a69..df4eb0b11 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -921,6 +921,25 @@ class SnapshotUpdateTest : public SnapshotTest { return AssertionSuccess(); } + // Create fake install operations to grow the COW device size. + void AddOperation(PartitionUpdate* partition_update, uint64_t size_bytes = 0) { + auto e = partition_update->add_operations()->add_dst_extents(); + e->set_start_block(0); + if (size_bytes == 0) { + size_bytes = GetSize(partition_update); + } + e->set_num_blocks(size_bytes / manifest_.block_size()); + } + + void AddOperationForPartitions(std::vector partitions = {}) { + if (partitions.empty()) { + partitions = {sys_, vnd_, prd_}; + } + for (auto* partition : partitions) { + AddOperation(partition); + } + } + std::unique_ptr opener_; DeltaArchiveManifest manifest_; std::unique_ptr src_; @@ -948,12 +967,7 @@ TEST_F(SnapshotUpdateTest, FullUpdateFlow) { SetSize(vnd_, partition_size); SetSize(prd_, partition_size); - // Create fake install operations to grow the COW device size. - for (auto& partition : {sys_, vnd_, prd_}) { - auto e = partition->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(GetSize(partition) / manifest_.block_size()); - } + AddOperationForPartitions(); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); @@ -1089,12 +1103,7 @@ TEST_F(SnapshotUpdateTest, TestRollback) { ASSERT_TRUE(sm->BeginUpdate()); ASSERT_TRUE(sm->UnmapUpdateSnapshot("sys_b")); - // Create fake install operations to grow the COW device size. - for (auto& partition : {sys_, vnd_, prd_}) { - auto e = partition->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(GetSize(partition) / manifest_.block_size()); - } + AddOperationForPartitions(); ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); @@ -1239,10 +1248,8 @@ TEST_F(SnapshotUpdateTest, RetrofitAfterRegularAb) { group_->set_size(kRetrofitGroupSize); for (auto* partition : {sys_, vnd_, prd_}) { SetSize(partition, 2_MiB); - auto* e = partition->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(2_MiB / manifest_.block_size()); } + AddOperationForPartitions(); ASSERT_TRUE(sm->BeginUpdate()); ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); @@ -1285,9 +1292,7 @@ TEST_F(SnapshotUpdateTest, MergeCannotRemoveCow) { } // Add operations for sys. The whole device is written. - auto e = sys_->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(GetSize(sys_) / manifest_.block_size()); + AddOperation(sys_); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); @@ -1477,10 +1482,7 @@ TEST_F(SnapshotUpdateTest, Hashtree) { const auto block_size = manifest_.block_size(); SetSize(sys_, partition_size); - - auto e = sys_->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(data_size / block_size); + AddOperation(sys_, data_size); // Set hastree extents. sys_->mutable_hash_tree_data_extent()->set_start_block(0); @@ -1525,9 +1527,7 @@ TEST_F(SnapshotUpdateTest, Overflow) { const auto actual_write_size = GetSize(sys_); const auto declared_write_size = actual_write_size - 1_MiB; - auto e = sys_->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(declared_write_size / manifest_.block_size()); + AddOperation(sys_, declared_write_size); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); From be67747729fbdd58f5fdbc1323b11ccd166c888e Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Fri, 13 Dec 2019 19:28:28 -0800 Subject: [PATCH 3/3] libsnapshot: add WaitForMerge Add an API that does not initiate the merge, but only waits for it to finish. It is different from ProcessUpdateState API in that it also blocks when state == UNVERIFIED and booting from the new slot. (ProcessUpdateState immediately returns in this case). This is useful for android.os.UpdateEngine.CleanupSuccessfulUpdate(). Bug: 138808328 Test: libsnapshot_test Change-Id: I7cc59fcaf69616e7ec7ebe6101991b5106845b65 Former-commit-id: 1af515b57c97ef9ce88334ffab9275df71ad87d0 --- .../include/libsnapshot/snapshot.h | 7 ++++ fs_mgr/libsnapshot/snapshot.cpp | 18 ++++++++- fs_mgr/libsnapshot/snapshot_test.cpp | 39 +++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index eb48a761a..445e6dbed 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -198,6 +198,13 @@ class SnapshotManager final { // - other states indicating an error has occurred UpdateState InitiateMergeAndWait(); + // Wait for the merge if rebooted into the new slot. Does NOT initiate a + // merge. If the merge has not been initiated (but should be), wait. + // Returns: + // - true there is no merge or merge finishes + // - false indicating an error has occurred + bool WaitForMerge(); + // Find the status of the current update, if any. // // |progress| depends on the returned status: diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index ce960f90d..a0ec0685b 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -74,6 +74,7 @@ using namespace std::chrono_literals; using namespace std::string_literals; static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot"; +static constexpr auto kUpdateStateCheckInterval = 2s; // Note: IImageManager is an incomplete type in the header, so the default // destructor doesn't work. @@ -731,7 +732,7 @@ UpdateState SnapshotManager::ProcessUpdateState(const std::function& cal // This wait is not super time sensitive, so we have a relatively // low polling frequency. - std::this_thread::sleep_for(2s); + std::this_thread::sleep_for(kUpdateStateCheckInterval); } } @@ -2241,6 +2242,21 @@ UpdateState SnapshotManager::InitiateMergeAndWait() { return state; } +bool SnapshotManager::WaitForMerge() { + LOG(INFO) << "Waiting for any previous merge request to complete. " + << "This can take up to several minutes."; + while (true) { + auto state = ProcessUpdateState(); + if (state == UpdateState::Unverified && GetCurrentSlot() == Slot::Target) { + LOG(INFO) << "Wait for merge to be initiated."; + std::this_thread::sleep_for(kUpdateStateCheckInterval); + continue; + } + LOG(INFO) << "Wait for merge exits with state " << state; + return state == UpdateState::None || state == UpdateState::MergeCompleted; + } +} + bool SnapshotManager::HandleImminentDataWipe(const std::function& callback) { if (!device_->IsRecovery()) { LOG(ERROR) << "Data wipes are only allowed in recovery."; diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index df4eb0b11..0f5af141a 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -1546,6 +1546,45 @@ TEST_F(SnapshotUpdateTest, Overflow) { << "FinishedSnapshotWrites should detect overflow of CoW device."; } +TEST_F(SnapshotUpdateTest, WaitForMerge) { + AddOperationForPartitions(); + + // Execute the update. + ASSERT_TRUE(sm->BeginUpdate()); + ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); + + // Write some data to target partitions. + for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { + ASSERT_TRUE(WriteSnapshotAndHash(name)); + } + + ASSERT_TRUE(sm->FinishedSnapshotWrites()); + + // Simulate shutting down the device. + ASSERT_TRUE(UnmapAll()); + + // After reboot, init does first stage mount. + { + auto init = SnapshotManager::NewForFirstStageMount(new TestDeviceInfo(fake_super, "_b")); + ASSERT_NE(nullptr, init); + ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super")); + } + + auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, "_b")); + ASSERT_NE(nullptr, new_sm); + + auto waiter = std::async(std::launch::async, [&new_sm] { return new_sm->WaitForMerge(); }); + ASSERT_EQ(std::future_status::timeout, waiter.wait_for(1s)) + << "WaitForMerge should block when not initiated"; + + auto merger = + std::async(std::launch::async, [&new_sm] { return new_sm->InitiateMergeAndWait(); }); + // Small images, so should be merged pretty quickly. + ASSERT_EQ(std::future_status::ready, waiter.wait_for(3s)) << "WaitForMerge did not finish"; + ASSERT_TRUE(waiter.get()); + ASSERT_THAT(merger.get(), AnyOf(UpdateState::None, UpdateState::MergeCompleted)); +} + class FlashAfterUpdateTest : public SnapshotUpdateTest, public WithParamInterface> { public: