From 63887881256561f54a7f652f4a601301ddf0f0d3 Mon Sep 17 00:00:00 2001 From: Peter Goodman Date: Thu, 5 Nov 2020 18:02:33 -0500 Subject: [PATCH] Rename arch_for_decode, remove lazy decode --- include/remill/Arch/Arch.h | 8 +- include/remill/Arch/Instruction.h | 4 +- lib/Arch/AArch64/Arch.cpp | 1 + lib/Arch/Arch.cpp | 5 - lib/Arch/Instruction.cpp | 16 +-- lib/Arch/SPARC32/Arch.cpp | 22 ++-- lib/Arch/SPARC64/Arch.cpp | 23 ++-- lib/Arch/X86/Arch.cpp | 187 +++++++++++++----------------- 8 files changed, 106 insertions(+), 160 deletions(-) diff --git a/include/remill/Arch/Arch.h b/include/remill/Arch/Arch.h index 4440e079c..74ee5ca58 100644 --- a/include/remill/Arch/Arch.h +++ b/include/remill/Arch/Arch.h @@ -63,7 +63,7 @@ struct Register { std::string name; // Name of the register. uint64_t offset; // Byte offset in `State`. - uint64_t size; // Size of this register. + uint64_t size; // Size of this register (in bytes). // LLVM type associated with the field in `State`. llvm::Type *type; @@ -199,12 +199,6 @@ class Arch { return this->DecodeInstruction(address, instr_bytes, inst); } - // Fully decode any control-flow transfer instructions, but only partially - // decode other instructions. - virtual bool LazyDecodeInstruction(uint64_t address, - std::string_view instr_bytes, - Instruction &inst) const; - // Maximum number of bytes in an instruction for this particular architecture. virtual uint64_t MaxInstructionSize(void) const = 0; diff --git a/include/remill/Arch/Instruction.h b/include/remill/Arch/Instruction.h index 753dfbbba..459c419d9 100644 --- a/include/remill/Arch/Instruction.h +++ b/include/remill/Arch/Instruction.h @@ -135,8 +135,6 @@ class Instruction { void Reset(void); - bool FinalizeDecode(void); - // Name of semantics function that implements this instruction. std::string function; @@ -160,7 +158,7 @@ class Instruction { // Pointer to the `remill::Arch` used to complete the decoding of this // instruction. - const Arch *arch_for_decode; + const Arch *arch; // Does the instruction require the use of the `__remill_atomic_begin` and // `__remill_atomic_end`? diff --git a/lib/Arch/AArch64/Arch.cpp b/lib/Arch/AArch64/Arch.cpp index 4bfeb3c10..eee339e7d 100644 --- a/lib/Arch/AArch64/Arch.cpp +++ b/lib/Arch/AArch64/Arch.cpp @@ -1193,6 +1193,7 @@ bool AArch64Arch::DecodeInstruction(uint64_t address, aarch64::InstData dinst = {}; auto bytes = reinterpret_cast(inst_bytes.data()); + inst.arch = this; inst.arch_name = arch_name; inst.pc = address; inst.next_pc = address + kInstructionSize; diff --git a/lib/Arch/Arch.cpp b/lib/Arch/Arch.cpp index bcb76740c..e9c81d55b 100644 --- a/lib/Arch/Arch.cpp +++ b/lib/Arch/Arch.cpp @@ -103,11 +103,6 @@ Arch::Arch(llvm::LLVMContext *context_, OSName os_name_, ArchName arch_name_) Arch::~Arch(void) {} -bool Arch::LazyDecodeInstruction(uint64_t address, std::string_view instr_bytes, - Instruction &inst) const { - return DecodeInstruction(address, instr_bytes, inst); -} - // Returns `true` if memory access are little endian byte ordered. bool Arch::MemoryAccessIsLittleEndian(void) const { return true; diff --git a/lib/Arch/Instruction.cpp b/lib/Arch/Instruction.cpp index c2eb72516..533e1f573 100644 --- a/lib/Arch/Instruction.cpp +++ b/lib/Arch/Instruction.cpp @@ -223,7 +223,7 @@ Instruction::Instruction(void) branch_taken_pc(0), branch_not_taken_pc(0), arch_name(kArchInvalid), - arch_for_decode(nullptr), + arch(nullptr), is_atomic_read_modify_write(false), has_branch_taken_delay_slot(false), has_branch_not_taken_delay_slot(false), @@ -242,24 +242,12 @@ void Instruction::Reset(void) { has_branch_not_taken_delay_slot = false; in_delay_slot = false; category = Instruction::kCategoryInvalid; - arch_for_decode = nullptr; + arch = nullptr; operands.clear(); function.clear(); bytes.clear(); } -bool Instruction::FinalizeDecode(void) { - if (!IsValid()) { - return false; - } else if (!arch_for_decode) { - return true; - } else { - auto ret = arch_for_decode->DecodeInstruction(pc, bytes, *this); - arch_for_decode = nullptr; - return ret; - } -} - std::string Instruction::Serialize(void) const { std::stringstream ss; ss << "("; diff --git a/lib/Arch/SPARC32/Arch.cpp b/lib/Arch/SPARC32/Arch.cpp index 03c656c53..0300a3c42 100644 --- a/lib/Arch/SPARC32/Arch.cpp +++ b/lib/Arch/SPARC32/Arch.cpp @@ -447,6 +447,17 @@ bool SPARC32Arch::NextInstructionIsDelayed(const Instruction &inst, // Decode an instruction. bool SPARC32Arch::DecodeInstruction( uint64_t address, std::string_view inst_bytes, Instruction &inst) const { + inst.pc = address; + inst.arch_name = arch_name; + inst.arch = this; + inst.category = Instruction::kCategoryInvalid; + inst.operands.clear(); + inst.next_pc = address + inst_bytes.size(); // Default fall-through. + inst.branch_taken_pc = 0; + inst.branch_not_taken_pc = 0; + inst.has_branch_taken_delay_slot = false; + inst.has_branch_not_taken_delay_slot = false; + if (address % 4) { return false; } @@ -455,17 +466,6 @@ bool SPARC32Arch::DecodeInstruction( return false; } - inst.pc = address; - inst.next_pc = address + inst_bytes.size(); // Default fall-through. - inst.branch_taken_pc = 0; - inst.branch_not_taken_pc = 0; - inst.has_branch_taken_delay_slot = false; - inst.has_branch_not_taken_delay_slot = false; - inst.arch_name = arch_name; - inst.arch_for_decode = nullptr; - inst.category = Instruction::kCategoryInvalid; - inst.operands.clear(); - if (!inst.bytes.empty() && inst.bytes.data() == inst_bytes.data()) { inst.bytes.resize(inst_bytes.size()); } else { diff --git a/lib/Arch/SPARC64/Arch.cpp b/lib/Arch/SPARC64/Arch.cpp index dfb38868f..91c1b7729 100644 --- a/lib/Arch/SPARC64/Arch.cpp +++ b/lib/Arch/SPARC64/Arch.cpp @@ -407,24 +407,25 @@ bool SPARC64Arch::NextInstructionIsDelayed(const Instruction &inst, // Decode an instruction. bool SPARC64Arch::DecodeInstruction( uint64_t address, std::string_view inst_bytes, Instruction &inst) const { - if (address % 4) { - return false; - } - - if (inst_bytes.size() != 4 && inst_bytes.size() != 8) { - return false; - } inst.pc = address; + inst.arch_name = arch_name; + inst.arch = this; + inst.category = Instruction::kCategoryInvalid; + inst.operands.clear(); inst.next_pc = address + inst_bytes.size(); // Default fall-through. inst.branch_taken_pc = 0; inst.branch_not_taken_pc = 0; inst.has_branch_taken_delay_slot = false; inst.has_branch_not_taken_delay_slot = false; - inst.arch_name = arch_name; - inst.arch_for_decode = nullptr; - inst.category = Instruction::kCategoryInvalid; - inst.operands.clear(); + + if (address % 4) { + return false; + } + + if (inst_bytes.size() != 4 && inst_bytes.size() != 8) { + return false; + } if (!inst.bytes.empty() && inst.bytes.data() == inst_bytes.data()) { inst.bytes.resize(inst_bytes.size()); diff --git a/lib/Arch/X86/Arch.cpp b/lib/Arch/X86/Arch.cpp index 0d93999e2..f831989e2 100644 --- a/lib/Arch/X86/Arch.cpp +++ b/lib/Arch/X86/Arch.cpp @@ -791,16 +791,10 @@ class X86Arch final : public Arch { // Returns the name of the program counter register. std::string_view ProgramCounterRegisterName(void) const override; - // Decode an instuction. + // Decode an instruction. bool DecodeInstruction(uint64_t address, std::string_view inst_bytes, Instruction &inst) const override; - // Fully decode any control-flow transfer instructions, but only partially - // decode other instructions. To complete the decoding, call - // `Instruction::FinalizeDecode`. - bool LazyDecodeInstruction(uint64_t address, std::string_view inst_bytes, - Instruction &inst) const override; - // Maximum number of bytes in an instruction. uint64_t MaxInstructionSize(void) const override; @@ -815,9 +809,6 @@ class X86Arch final : public Arch { llvm::Function *bb_func) const override; private: - // Decode an instuction. - bool DecodeInstruction(uint64_t address, std::string_view inst_bytes, - Instruction &inst, bool is_lazy) const; X86Arch(void) = delete; }; @@ -950,11 +941,13 @@ llvm::DataLayout X86Arch::DataLayout(void) const { // Decode an instuction. bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes, - Instruction &inst, bool is_lazy) const { + Instruction &inst) const { inst.pc = address; + inst.arch = this; inst.arch_name = arch_name; inst.category = Instruction::kCategoryInvalid; + inst.operands.clear(); xed_decoded_inst_t xedd_; xed_decoded_inst_t *xedd = &xedd_; @@ -991,95 +984,93 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes, auto iform = xed_decoded_inst_get_iform_enum(xedd); - if (!is_lazy || inst.IsControlFlow()) { - inst.function = InstructionFunctionName(xedd); - - // Lift the operands. This creates the arguments for us to call the - // instuction implementation. - auto xedi = xed_decoded_inst_inst(xedd); - auto num_operands = xed_decoded_inst_noperands(xedd); - for (auto i = 0U; i < num_operands; ++i) { - auto xedo = xed_inst_operand(xedi, i); - if (XED_OPVIS_SUPPRESSED != xed_operand_operand_visibility(xedo)) { - DecodeOperand(inst, xedd, xedo); - } - } + inst.function = InstructionFunctionName(xedd); - // Control flow operands update the next program counter. - if (inst.IsControlFlow()) { - inst.operands.emplace_back(); - auto &dst_ret_pc = inst.operands.back(); - dst_ret_pc.type = Operand::kTypeRegister; - dst_ret_pc.action = Operand::kActionWrite; - dst_ret_pc.size = address_size; - dst_ret_pc.reg.name = "NEXT_PC"; - dst_ret_pc.reg.size = address_size; + // Lift the operands. This creates the arguments for us to call the + // instuction implementation. + auto xedi = xed_decoded_inst_inst(xedd); + auto num_operands = xed_decoded_inst_noperands(xedd); + for (auto i = 0U; i < num_operands; ++i) { + auto xedo = xed_inst_operand(xedi, i); + if (XED_OPVIS_SUPPRESSED != xed_operand_operand_visibility(xedo)) { + DecodeOperand(inst, xedd, xedo); } + } - if (inst.IsFunctionCall()) { - DecodeFallThroughPC(inst, xedd); - - // The semantics will store the return address in `RETURN_PC`. This is to - // help synchronize program counters when lifting instructions on an ISA - // with delay slots. - inst.operands.emplace_back(); - auto &dst_ret_pc = inst.operands.back(); - dst_ret_pc.type = Operand::kTypeRegister; - dst_ret_pc.action = Operand::kActionWrite; - dst_ret_pc.size = address_size; - dst_ret_pc.reg.name = "RETURN_PC"; - dst_ret_pc.reg.size = address_size; - } + // Control flow operands update the next program counter. + if (inst.IsControlFlow()) { + inst.operands.emplace_back(); + auto &dst_ret_pc = inst.operands.back(); + dst_ret_pc.type = Operand::kTypeRegister; + dst_ret_pc.action = Operand::kActionWrite; + dst_ret_pc.size = address_size; + dst_ret_pc.reg.name = "NEXT_PC"; + dst_ret_pc.reg.size = address_size; + } - if (UsesStopFailure(xedd)) { + if (inst.IsFunctionCall()) { + DecodeFallThroughPC(inst, xedd); + + // The semantics will store the return address in `RETURN_PC`. This is to + // help synchronize program counters when lifting instructions on an ISA + // with delay slots. + inst.operands.emplace_back(); + auto &dst_ret_pc = inst.operands.back(); + dst_ret_pc.type = Operand::kTypeRegister; + dst_ret_pc.action = Operand::kActionWrite; + dst_ret_pc.size = address_size; + dst_ret_pc.reg.name = "RETURN_PC"; + dst_ret_pc.reg.size = address_size; + } - // These instructions might fault and uses the StopFailure to recover. - // The new operand `next_pc` is added and the REG_PC is set to next_pc - // before calling the StopFailure + if (UsesStopFailure(xedd)) { - inst.operands.emplace_back(); - auto &next_pc = inst.operands.back(); - next_pc.type = Operand::kTypeRegister; - next_pc.action = Operand::kActionRead; - next_pc.size = address_size; - next_pc.reg.name = "NEXT_PC"; - next_pc.reg.size = address_size; - } + // These instructions might fault and uses the StopFailure to recover. + // The new operand `next_pc` is added and the REG_PC is set to next_pc + // before calling the StopFailure - // All non-control FPU instructions update the last instruction pointer - // and opcode. - if (XED_ISA_SET_X87 == xed_decoded_inst_get_isa_set(xedd) || - XED_ISA_SET_FCMOV == xed_decoded_inst_get_isa_set(xedd) || - XED_CATEGORY_X87_ALU == xed_decoded_inst_get_category(xedd)) { - auto set_ip_dp = false; - const auto get_attr = xed_decoded_inst_get_attribute; - switch (iform) { - case XED_IFORM_FNOP: - case XED_IFORM_FINCSTP: - case XED_IFORM_FDECSTP: - case XED_IFORM_FFREE_X87: - case XED_IFORM_FFREEP_X87: set_ip_dp = true; break; - default: - set_ip_dp = !get_attr(xedd, XED_ATTRIBUTE_X87_CONTROL) && - !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_CW) && - !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_R) && - !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_W) && - !get_attr(xedd, XED_ATTRIBUTE_X87_NOWAIT); - break; - } + inst.operands.emplace_back(); + auto &next_pc = inst.operands.back(); + next_pc.type = Operand::kTypeRegister; + next_pc.action = Operand::kActionRead; + next_pc.size = address_size; + next_pc.reg.name = "NEXT_PC"; + next_pc.reg.size = address_size; + } - if (set_ip_dp) { - DecodeX87LastIpDp(inst); - } + // All non-control FPU instructions update the last instruction pointer + // and opcode. + if (XED_ISA_SET_X87 == xed_decoded_inst_get_isa_set(xedd) || + XED_ISA_SET_FCMOV == xed_decoded_inst_get_isa_set(xedd) || + XED_CATEGORY_X87_ALU == xed_decoded_inst_get_category(xedd)) { + auto set_ip_dp = false; + const auto get_attr = xed_decoded_inst_get_attribute; + switch (iform) { + case XED_IFORM_FNOP: + case XED_IFORM_FINCSTP: + case XED_IFORM_FDECSTP: + case XED_IFORM_FFREE_X87: + case XED_IFORM_FFREEP_X87: set_ip_dp = true; break; + default: + set_ip_dp = !get_attr(xedd, XED_ATTRIBUTE_X87_CONTROL) && + !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_CW) && + !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_R) && + !get_attr(xedd, XED_ATTRIBUTE_X87_MMX_STATE_W) && + !get_attr(xedd, XED_ATTRIBUTE_X87_NOWAIT); + break; } - if (xed_decoded_inst_is_xacquire(xedd) || - xed_decoded_inst_is_xrelease(xedd)) { - LOG(ERROR) << "Ignoring XACQUIRE/XRELEASE prefix at " << std::hex - << inst.pc << std::dec; + if (set_ip_dp) { + DecodeX87LastIpDp(inst); } } + if (xed_decoded_inst_is_xacquire(xedd) || + xed_decoded_inst_is_xrelease(xedd)) { + LOG(ERROR) << "Ignoring XACQUIRE/XRELEASE prefix at " << std::hex + << inst.pc << std::dec; + } + // Make sure we disallow decoding of AVX instructions when running with non- // AVX arch specified. Same thing for AVX512 instructions. switch (xed_decoded_inst_get_isa_set(xedd)) { @@ -1190,28 +1181,6 @@ std::string_view X86Arch::ProgramCounterRegisterName(void) const { return kPCNames[IsX86()]; } -bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes, - Instruction &inst) const { - inst.arch_for_decode = nullptr; - return DecodeInstruction(address, inst_bytes, inst, false); -} - -// Fully decode any control-flow transfer instructions, but only partially -// decode other instructions. -bool X86Arch::LazyDecodeInstruction(uint64_t address, - std::string_view inst_bytes, - Instruction &inst) const { - inst.arch_for_decode = nullptr; - if (DecodeInstruction(address, inst_bytes, inst, true)) { - if (!inst.IsControlFlow()) { - inst.arch_for_decode = this; - } - return true; - } else { - return false; - } -} - // Populate the `__remill_basic_block` function with variables. void X86Arch::PopulateBasicBlockFunction(llvm::Module *module, llvm::Function *bb_func) const {