Skip to content

Commit

Permalink
Merge changes from topic "wait_for_merge"
Browse files Browse the repository at this point in the history
* changes:
  libsnapshot: add WaitForMerge
  libsnapshot: SnapshotUpdateTest::AddOperation
  libsnapshot: add GetCurrentSlot


Former-commit-id: 18c6248ffeb1c790c5c57ffbdb0c6768bbbca537
  • Loading branch information
Yifan Hong authored and Gerrit Code Review committed Dec 17, 2019
2 parents 204628b + be67747 commit 51eb60f
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 66 deletions.
11 changes: 11 additions & 0 deletions fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,13 @@ class SnapshotManager final {
// - other states indicating an error has occurred
UpdateState InitiateMergeAndWait();

// Wait for the merge if rebooted into the new slot. Does NOT initiate a
// merge. If the merge has not been initiated (but should be), wait.
// Returns:
// - true there is no merge or merge finishes
// - false indicating an error has occurred
bool WaitForMerge();

// Find the status of the current update, if any.
//
// |progress| depends on the returned status:
Expand Down Expand Up @@ -496,6 +503,10 @@ class SnapshotManager final {
// as a sanity check.
bool EnsureNoOverflowSnapshot(LockedFile* lock);

enum class Slot { Unknown, Source, Target };
friend std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot);
Slot GetCurrentSlot();

std::string gsid_dir_;
std::string metadata_dir_;
std::unique_ptr<IDeviceInfo> device_;
Expand Down
99 changes: 58 additions & 41 deletions fs_mgr/libsnapshot/snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ using namespace std::chrono_literals;
using namespace std::string_literals;

static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
static constexpr auto kUpdateStateCheckInterval = 2s;

// Note: IImageManager is an incomplete type in the header, so the default
// destructor doesn't work.
Expand Down Expand Up @@ -171,21 +172,29 @@ bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {

if (state == UpdateState::Unverified) {
// We completed an update, but it can still be canceled if we haven't booted into it.
auto boot_file = GetSnapshotBootIndicatorPath();
std::string contents;
if (!android::base::ReadFileToString(boot_file, &contents)) {
PLOG(WARNING) << "Cannot read " << boot_file << ", proceed to canceling the update:";
return RemoveAllUpdateState(file.get());
}
if (device_->GetSlotSuffix() == contents) {
LOG(INFO) << "Canceling a previously completed update";
auto slot = GetCurrentSlot();
if (slot != Slot::Target) {
LOG(INFO) << "Canceling previously completed updates (if any)";
return RemoveAllUpdateState(file.get());
}
}
*needs_merge = true;
return true;
}

SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
auto boot_file = GetSnapshotBootIndicatorPath();
std::string contents;
if (!android::base::ReadFileToString(boot_file, &contents)) {
PLOG(WARNING) << "Cannot read " << boot_file;
return Slot::Unknown;
}
if (device_->GetSlotSuffix() == contents) {
return Slot::Source;
}
return Slot::Target;
}

bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock) {
if (!RemoveAllSnapshots(lock)) {
LOG(ERROR) << "Could not remove all snapshots";
Expand Down Expand Up @@ -505,15 +514,9 @@ bool SnapshotManager::InitiateMerge() {
return false;
}

std::string old_slot;
auto boot_file = GetSnapshotBootIndicatorPath();
if (!android::base::ReadFileToString(boot_file, &old_slot)) {
LOG(ERROR) << "Could not determine the previous slot; aborting merge";
return false;
}
auto new_slot = device_->GetSlotSuffix();
if (new_slot == old_slot) {
LOG(ERROR) << "Device cannot merge while booting off old slot " << old_slot;
auto slot = GetCurrentSlot();
if (slot != Slot::Target) {
LOG(ERROR) << "Device cannot merge while not booting from new slot";
return false;
}

Expand Down Expand Up @@ -729,7 +732,7 @@ UpdateState SnapshotManager::ProcessUpdateState(const std::function<void()>& cal

// This wait is not super time sensitive, so we have a relatively
// low polling frequency.
std::this_thread::sleep_for(2s);
std::this_thread::sleep_for(kUpdateStateCheckInterval);
}
}

Expand Down Expand Up @@ -1097,13 +1100,11 @@ bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
}

bool SnapshotManager::HandleCancelledUpdate(LockedFile* lock) {
std::string old_slot;
auto boot_file = GetSnapshotBootIndicatorPath();
if (!android::base::ReadFileToString(boot_file, &old_slot)) {
PLOG(ERROR) << "Unable to read the snapshot indicator file: " << boot_file;
auto slot = GetCurrentSlot();
if (slot == Slot::Unknown) {
return false;
}
if (device_->GetSlotSuffix() != old_slot) {
if (slot == Slot::Target) {
// We're booted into the target slot, which means we just rebooted
// after applying the update.
if (!HandleCancelledUpdateOnNewSlot(lock)) {
Expand Down Expand Up @@ -1271,14 +1272,9 @@ bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
// ultimately we'll fail to boot. Why not make it a fatal error and have
// the reason be clearer? Because the indicator file still exists, and
// if this was FATAL, reverting to the old slot would be broken.
std::string old_slot;
auto boot_file = GetSnapshotBootIndicatorPath();
if (!android::base::ReadFileToString(boot_file, &old_slot)) {
PLOG(ERROR) << "Unable to read the snapshot indicator file: " << boot_file;
return false;
}
if (device_->GetSlotSuffix() == old_slot) {
LOG(INFO) << "Detected slot rollback, will not mount snapshots.";
auto slot = GetCurrentSlot();
if (slot != Slot::Target) {
LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
return false;
}

Expand Down Expand Up @@ -2156,6 +2152,17 @@ bool SnapshotManager::UnmapAllPartitions() {
return ok;
}

std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot) {
switch (slot) {
case SnapshotManager::Slot::Unknown:
return os << "unknown";
case SnapshotManager::Slot::Source:
return os << "source";
case SnapshotManager::Slot::Target:
return os << "target";
}
}

bool SnapshotManager::Dump(std::ostream& os) {
// Don't actually lock. Dump() is for debugging purposes only, so it is okay
// if it is racy.
Expand All @@ -2166,11 +2173,8 @@ bool SnapshotManager::Dump(std::ostream& os) {

ss << "Update state: " << ReadUpdateState(file.get()) << std::endl;

auto boot_file = GetSnapshotBootIndicatorPath();
std::string boot_indicator;
if (android::base::ReadFileToString(boot_file, &boot_indicator)) {
ss << "Boot indicator: old slot = " << boot_indicator << std::endl;
}
ss << "Current slot: " << device_->GetSlotSuffix() << std::endl;
ss << "Boot indicator: booting from " << GetCurrentSlot() << " slot" << std::endl;

bool ok = true;
std::vector<std::string> snapshots;
Expand Down Expand Up @@ -2238,6 +2242,21 @@ UpdateState SnapshotManager::InitiateMergeAndWait() {
return state;
}

bool SnapshotManager::WaitForMerge() {
LOG(INFO) << "Waiting for any previous merge request to complete. "
<< "This can take up to several minutes.";
while (true) {
auto state = ProcessUpdateState();
if (state == UpdateState::Unverified && GetCurrentSlot() == Slot::Target) {
LOG(INFO) << "Wait for merge to be initiated.";
std::this_thread::sleep_for(kUpdateStateCheckInterval);
continue;
}
LOG(INFO) << "Wait for merge exits with state " << state;
return state == UpdateState::None || state == UpdateState::MergeCompleted;
}
}

bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callback) {
if (!device_->IsRecovery()) {
LOG(ERROR) << "Data wipes are only allowed in recovery.";
Expand Down Expand Up @@ -2283,11 +2302,9 @@ bool SnapshotManager::HandleImminentDataWipe(const std::function<void()>& callba
//
// Since the rollback is inevitable, we don't treat a HAL failure
// as an error here.
std::string old_slot;
auto boot_file = GetSnapshotBootIndicatorPath();
if (android::base::ReadFileToString(boot_file, &old_slot) &&
device_->GetSlotSuffix() != old_slot) {
LOG(ERROR) << "Reverting to slot " << old_slot << " since update will be deleted.";
auto slot = GetCurrentSlot();
if (slot == Slot::Target) {
LOG(ERROR) << "Reverting to old slot since update will be deleted.";
device_->SetSlotAsUnbootable(slot_number);
}
break;
Expand Down
89 changes: 64 additions & 25 deletions fs_mgr/libsnapshot/snapshot_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,25 @@ class SnapshotUpdateTest : public SnapshotTest {
return AssertionSuccess();
}

// Create fake install operations to grow the COW device size.
void AddOperation(PartitionUpdate* partition_update, uint64_t size_bytes = 0) {
auto e = partition_update->add_operations()->add_dst_extents();
e->set_start_block(0);
if (size_bytes == 0) {
size_bytes = GetSize(partition_update);
}
e->set_num_blocks(size_bytes / manifest_.block_size());
}

void AddOperationForPartitions(std::vector<PartitionUpdate*> partitions = {}) {
if (partitions.empty()) {
partitions = {sys_, vnd_, prd_};
}
for (auto* partition : partitions) {
AddOperation(partition);
}
}

std::unique_ptr<TestPartitionOpener> opener_;
DeltaArchiveManifest manifest_;
std::unique_ptr<MetadataBuilder> src_;
Expand Down Expand Up @@ -948,12 +967,7 @@ TEST_F(SnapshotUpdateTest, FullUpdateFlow) {
SetSize(vnd_, partition_size);
SetSize(prd_, partition_size);

// Create fake install operations to grow the COW device size.
for (auto& partition : {sys_, vnd_, prd_}) {
auto e = partition->add_operations()->add_dst_extents();
e->set_start_block(0);
e->set_num_blocks(GetSize(partition) / manifest_.block_size());
}
AddOperationForPartitions();

// Execute the update.
ASSERT_TRUE(sm->BeginUpdate());
Expand Down Expand Up @@ -1089,12 +1103,7 @@ TEST_F(SnapshotUpdateTest, TestRollback) {
ASSERT_TRUE(sm->BeginUpdate());
ASSERT_TRUE(sm->UnmapUpdateSnapshot("sys_b"));

// Create fake install operations to grow the COW device size.
for (auto& partition : {sys_, vnd_, prd_}) {
auto e = partition->add_operations()->add_dst_extents();
e->set_start_block(0);
e->set_num_blocks(GetSize(partition) / manifest_.block_size());
}
AddOperationForPartitions();

ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));

Expand Down Expand Up @@ -1239,10 +1248,8 @@ TEST_F(SnapshotUpdateTest, RetrofitAfterRegularAb) {
group_->set_size(kRetrofitGroupSize);
for (auto* partition : {sys_, vnd_, prd_}) {
SetSize(partition, 2_MiB);
auto* e = partition->add_operations()->add_dst_extents();
e->set_start_block(0);
e->set_num_blocks(2_MiB / manifest_.block_size());
}
AddOperationForPartitions();

ASSERT_TRUE(sm->BeginUpdate());
ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
Expand Down Expand Up @@ -1285,9 +1292,7 @@ TEST_F(SnapshotUpdateTest, MergeCannotRemoveCow) {
}

// Add operations for sys. The whole device is written.
auto e = sys_->add_operations()->add_dst_extents();
e->set_start_block(0);
e->set_num_blocks(GetSize(sys_) / manifest_.block_size());
AddOperation(sys_);

// Execute the update.
ASSERT_TRUE(sm->BeginUpdate());
Expand Down Expand Up @@ -1477,10 +1482,7 @@ TEST_F(SnapshotUpdateTest, Hashtree) {

const auto block_size = manifest_.block_size();
SetSize(sys_, partition_size);

auto e = sys_->add_operations()->add_dst_extents();
e->set_start_block(0);
e->set_num_blocks(data_size / block_size);
AddOperation(sys_, data_size);

// Set hastree extents.
sys_->mutable_hash_tree_data_extent()->set_start_block(0);
Expand Down Expand Up @@ -1525,9 +1527,7 @@ TEST_F(SnapshotUpdateTest, Overflow) {
const auto actual_write_size = GetSize(sys_);
const auto declared_write_size = actual_write_size - 1_MiB;

auto e = sys_->add_operations()->add_dst_extents();
e->set_start_block(0);
e->set_num_blocks(declared_write_size / manifest_.block_size());
AddOperation(sys_, declared_write_size);

// Execute the update.
ASSERT_TRUE(sm->BeginUpdate());
Expand All @@ -1546,6 +1546,45 @@ TEST_F(SnapshotUpdateTest, Overflow) {
<< "FinishedSnapshotWrites should detect overflow of CoW device.";
}

TEST_F(SnapshotUpdateTest, WaitForMerge) {
AddOperationForPartitions();

// Execute the update.
ASSERT_TRUE(sm->BeginUpdate());
ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));

// Write some data to target partitions.
for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
ASSERT_TRUE(WriteSnapshotAndHash(name));
}

ASSERT_TRUE(sm->FinishedSnapshotWrites());

// Simulate shutting down the device.
ASSERT_TRUE(UnmapAll());

// After reboot, init does first stage mount.
{
auto init = SnapshotManager::NewForFirstStageMount(new TestDeviceInfo(fake_super, "_b"));
ASSERT_NE(nullptr, init);
ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super"));
}

auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, "_b"));
ASSERT_NE(nullptr, new_sm);

auto waiter = std::async(std::launch::async, [&new_sm] { return new_sm->WaitForMerge(); });
ASSERT_EQ(std::future_status::timeout, waiter.wait_for(1s))
<< "WaitForMerge should block when not initiated";

auto merger =
std::async(std::launch::async, [&new_sm] { return new_sm->InitiateMergeAndWait(); });
// Small images, so should be merged pretty quickly.
ASSERT_EQ(std::future_status::ready, waiter.wait_for(3s)) << "WaitForMerge did not finish";
ASSERT_TRUE(waiter.get());
ASSERT_THAT(merger.get(), AnyOf(UpdateState::None, UpdateState::MergeCompleted));
}

class FlashAfterUpdateTest : public SnapshotUpdateTest,
public WithParamInterface<std::tuple<uint32_t, bool>> {
public:
Expand Down

0 comments on commit 51eb60f

Please sign in to comment.