diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h index 5738b9633..445e6dbed 100644 --- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h +++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h @@ -198,6 +198,13 @@ class SnapshotManager final { // - other states indicating an error has occurred UpdateState InitiateMergeAndWait(); + // Wait for the merge if rebooted into the new slot. Does NOT initiate a + // merge. If the merge has not been initiated (but should be), wait. + // Returns: + // - true there is no merge or merge finishes + // - false indicating an error has occurred + bool WaitForMerge(); + // Find the status of the current update, if any. // // |progress| depends on the returned status: @@ -496,6 +503,10 @@ class SnapshotManager final { // as a sanity check. bool EnsureNoOverflowSnapshot(LockedFile* lock); + enum class Slot { Unknown, Source, Target }; + friend std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot); + Slot GetCurrentSlot(); + std::string gsid_dir_; std::string metadata_dir_; std::unique_ptr device_; diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp index f38db4312..a0ec0685b 100644 --- a/fs_mgr/libsnapshot/snapshot.cpp +++ b/fs_mgr/libsnapshot/snapshot.cpp @@ -74,6 +74,7 @@ using namespace std::chrono_literals; using namespace std::string_literals; static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot"; +static constexpr auto kUpdateStateCheckInterval = 2s; // Note: IImageManager is an incomplete type in the header, so the default // destructor doesn't work. @@ -171,14 +172,9 @@ bool SnapshotManager::TryCancelUpdate(bool* needs_merge) { if (state == UpdateState::Unverified) { // We completed an update, but it can still be canceled if we haven't booted into it. - auto boot_file = GetSnapshotBootIndicatorPath(); - std::string contents; - if (!android::base::ReadFileToString(boot_file, &contents)) { - PLOG(WARNING) << "Cannot read " << boot_file << ", proceed to canceling the update:"; - return RemoveAllUpdateState(file.get()); - } - if (device_->GetSlotSuffix() == contents) { - LOG(INFO) << "Canceling a previously completed update"; + auto slot = GetCurrentSlot(); + if (slot != Slot::Target) { + LOG(INFO) << "Canceling previously completed updates (if any)"; return RemoveAllUpdateState(file.get()); } } @@ -186,6 +182,19 @@ bool SnapshotManager::TryCancelUpdate(bool* needs_merge) { return true; } +SnapshotManager::Slot SnapshotManager::GetCurrentSlot() { + auto boot_file = GetSnapshotBootIndicatorPath(); + std::string contents; + if (!android::base::ReadFileToString(boot_file, &contents)) { + PLOG(WARNING) << "Cannot read " << boot_file; + return Slot::Unknown; + } + if (device_->GetSlotSuffix() == contents) { + return Slot::Source; + } + return Slot::Target; +} + bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock) { if (!RemoveAllSnapshots(lock)) { LOG(ERROR) << "Could not remove all snapshots"; @@ -505,15 +514,9 @@ bool SnapshotManager::InitiateMerge() { return false; } - std::string old_slot; - auto boot_file = GetSnapshotBootIndicatorPath(); - if (!android::base::ReadFileToString(boot_file, &old_slot)) { - LOG(ERROR) << "Could not determine the previous slot; aborting merge"; - return false; - } - auto new_slot = device_->GetSlotSuffix(); - if (new_slot == old_slot) { - LOG(ERROR) << "Device cannot merge while booting off old slot " << old_slot; + auto slot = GetCurrentSlot(); + if (slot != Slot::Target) { + LOG(ERROR) << "Device cannot merge while not booting from new slot"; return false; } @@ -729,7 +732,7 @@ UpdateState SnapshotManager::ProcessUpdateState(const std::function& cal // This wait is not super time sensitive, so we have a relatively // low polling frequency. - std::this_thread::sleep_for(2s); + std::this_thread::sleep_for(kUpdateStateCheckInterval); } } @@ -1097,13 +1100,11 @@ bool SnapshotManager::CollapseSnapshotDevice(const std::string& name, } bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock) { - std::string old_slot; - auto boot_file = GetSnapshotBootIndicatorPath(); - if (!android::base::ReadFileToString(boot_file, &old_slot)) { - PLOG(ERROR) << "Unable to read the snapshot indicator file: " << boot_file; + auto slot = GetCurrentSlot(); + if (slot == Slot::Unknown) { return false; } - if (device_->GetSlotSuffix() != old_slot) { + if (slot == Slot::Target) { // We're booted into the target slot, which means we just rebooted // after applying the update. if (!HandleCancelledUpdateOnNewSlot(lock)) { @@ -1271,14 +1272,9 @@ bool SnapshotManager::NeedSnapshotsInFirstStageMount() { // ultimately we'll fail to boot. Why not make it a fatal error and have // the reason be clearer? Because the indicator file still exists, and // if this was FATAL, reverting to the old slot would be broken. - std::string old_slot; - auto boot_file = GetSnapshotBootIndicatorPath(); - if (!android::base::ReadFileToString(boot_file, &old_slot)) { - PLOG(ERROR) << "Unable to read the snapshot indicator file: " << boot_file; - return false; - } - if (device_->GetSlotSuffix() == old_slot) { - LOG(INFO) << "Detected slot rollback, will not mount snapshots."; + auto slot = GetCurrentSlot(); + if (slot != Slot::Target) { + LOG(INFO) << "Not booting from new slot. Will not mount snapshots."; return false; } @@ -2156,6 +2152,17 @@ bool SnapshotManager::UnmapAllPartitions() { return ok; } +std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) { + switch (slot) { + case SnapshotManager::Slot::Unknown: + return os << "unknown"; + case SnapshotManager::Slot::Source: + return os << "source"; + case SnapshotManager::Slot::Target: + return os << "target"; + } +} + bool SnapshotManager::Dump(std::ostream& os) { // Don't actually lock. Dump() is for debugging purposes only, so it is okay // if it is racy. @@ -2166,11 +2173,8 @@ bool SnapshotManager::Dump(std::ostream& os) { ss << "Update state: " << ReadUpdateState(file.get()) << std::endl; - auto boot_file = GetSnapshotBootIndicatorPath(); - std::string boot_indicator; - if (android::base::ReadFileToString(boot_file, &boot_indicator)) { - ss << "Boot indicator: old slot = " << boot_indicator << std::endl; - } + ss << "Current slot: " << device_->GetSlotSuffix() << std::endl; + ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl; bool ok = true; std::vector snapshots; @@ -2238,6 +2242,21 @@ UpdateState SnapshotManager::InitiateMergeAndWait() { return state; } +bool SnapshotManager::WaitForMerge() { + LOG(INFO) << "Waiting for any previous merge request to complete. " + << "This can take up to several minutes."; + while (true) { + auto state = ProcessUpdateState(); + if (state == UpdateState::Unverified && GetCurrentSlot() == Slot::Target) { + LOG(INFO) << "Wait for merge to be initiated."; + std::this_thread::sleep_for(kUpdateStateCheckInterval); + continue; + } + LOG(INFO) << "Wait for merge exits with state " << state; + return state == UpdateState::None || state == UpdateState::MergeCompleted; + } +} + bool SnapshotManager::HandleImminentDataWipe(const std::function& callback) { if (!device_->IsRecovery()) { LOG(ERROR) << "Data wipes are only allowed in recovery."; @@ -2283,11 +2302,9 @@ bool SnapshotManager::HandleImminentDataWipe(const std::function& callba // // Since the rollback is inevitable, we don't treat a HAL failure // as an error here. - std::string old_slot; - auto boot_file = GetSnapshotBootIndicatorPath(); - if (android::base::ReadFileToString(boot_file, &old_slot) && - device_->GetSlotSuffix() != old_slot) { - LOG(ERROR) << "Reverting to slot " << old_slot << " since update will be deleted."; + auto slot = GetCurrentSlot(); + if (slot == Slot::Target) { + LOG(ERROR) << "Reverting to old slot since update will be deleted."; device_->SetSlotAsUnbootable(slot_number); } break; diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp index 964b21a69..0f5af141a 100644 --- a/fs_mgr/libsnapshot/snapshot_test.cpp +++ b/fs_mgr/libsnapshot/snapshot_test.cpp @@ -921,6 +921,25 @@ class SnapshotUpdateTest : public SnapshotTest { return AssertionSuccess(); } + // Create fake install operations to grow the COW device size. + void AddOperation(PartitionUpdate* partition_update, uint64_t size_bytes = 0) { + auto e = partition_update->add_operations()->add_dst_extents(); + e->set_start_block(0); + if (size_bytes == 0) { + size_bytes = GetSize(partition_update); + } + e->set_num_blocks(size_bytes / manifest_.block_size()); + } + + void AddOperationForPartitions(std::vector partitions = {}) { + if (partitions.empty()) { + partitions = {sys_, vnd_, prd_}; + } + for (auto* partition : partitions) { + AddOperation(partition); + } + } + std::unique_ptr opener_; DeltaArchiveManifest manifest_; std::unique_ptr src_; @@ -948,12 +967,7 @@ TEST_F(SnapshotUpdateTest, FullUpdateFlow) { SetSize(vnd_, partition_size); SetSize(prd_, partition_size); - // Create fake install operations to grow the COW device size. - for (auto& partition : {sys_, vnd_, prd_}) { - auto e = partition->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(GetSize(partition) / manifest_.block_size()); - } + AddOperationForPartitions(); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); @@ -1089,12 +1103,7 @@ TEST_F(SnapshotUpdateTest, TestRollback) { ASSERT_TRUE(sm->BeginUpdate()); ASSERT_TRUE(sm->UnmapUpdateSnapshot("sys_b")); - // Create fake install operations to grow the COW device size. - for (auto& partition : {sys_, vnd_, prd_}) { - auto e = partition->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(GetSize(partition) / manifest_.block_size()); - } + AddOperationForPartitions(); ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); @@ -1239,10 +1248,8 @@ TEST_F(SnapshotUpdateTest, RetrofitAfterRegularAb) { group_->set_size(kRetrofitGroupSize); for (auto* partition : {sys_, vnd_, prd_}) { SetSize(partition, 2_MiB); - auto* e = partition->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(2_MiB / manifest_.block_size()); } + AddOperationForPartitions(); ASSERT_TRUE(sm->BeginUpdate()); ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); @@ -1285,9 +1292,7 @@ TEST_F(SnapshotUpdateTest, MergeCannotRemoveCow) { } // Add operations for sys. The whole device is written. - auto e = sys_->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(GetSize(sys_) / manifest_.block_size()); + AddOperation(sys_); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); @@ -1477,10 +1482,7 @@ TEST_F(SnapshotUpdateTest, Hashtree) { const auto block_size = manifest_.block_size(); SetSize(sys_, partition_size); - - auto e = sys_->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(data_size / block_size); + AddOperation(sys_, data_size); // Set hastree extents. sys_->mutable_hash_tree_data_extent()->set_start_block(0); @@ -1525,9 +1527,7 @@ TEST_F(SnapshotUpdateTest, Overflow) { const auto actual_write_size = GetSize(sys_); const auto declared_write_size = actual_write_size - 1_MiB; - auto e = sys_->add_operations()->add_dst_extents(); - e->set_start_block(0); - e->set_num_blocks(declared_write_size / manifest_.block_size()); + AddOperation(sys_, declared_write_size); // Execute the update. ASSERT_TRUE(sm->BeginUpdate()); @@ -1546,6 +1546,45 @@ TEST_F(SnapshotUpdateTest, Overflow) { << "FinishedSnapshotWrites should detect overflow of CoW device."; } +TEST_F(SnapshotUpdateTest, WaitForMerge) { + AddOperationForPartitions(); + + // Execute the update. + ASSERT_TRUE(sm->BeginUpdate()); + ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_)); + + // Write some data to target partitions. + for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) { + ASSERT_TRUE(WriteSnapshotAndHash(name)); + } + + ASSERT_TRUE(sm->FinishedSnapshotWrites()); + + // Simulate shutting down the device. + ASSERT_TRUE(UnmapAll()); + + // After reboot, init does first stage mount. + { + auto init = SnapshotManager::NewForFirstStageMount(new TestDeviceInfo(fake_super, "_b")); + ASSERT_NE(nullptr, init); + ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super")); + } + + auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, "_b")); + ASSERT_NE(nullptr, new_sm); + + auto waiter = std::async(std::launch::async, [&new_sm] { return new_sm->WaitForMerge(); }); + ASSERT_EQ(std::future_status::timeout, waiter.wait_for(1s)) + << "WaitForMerge should block when not initiated"; + + auto merger = + std::async(std::launch::async, [&new_sm] { return new_sm->InitiateMergeAndWait(); }); + // Small images, so should be merged pretty quickly. + ASSERT_EQ(std::future_status::ready, waiter.wait_for(3s)) << "WaitForMerge did not finish"; + ASSERT_TRUE(waiter.get()); + ASSERT_THAT(merger.get(), AnyOf(UpdateState::None, UpdateState::MergeCompleted)); +} + class FlashAfterUpdateTest : public SnapshotUpdateTest, public WithParamInterface> { public: