Skip to content

Commit

Permalink
Merge branch 'master' of github.com:lifting-bits/remill
Browse files Browse the repository at this point in the history
  • Loading branch information
pgoodman committed Nov 2, 2020
2 parents 2e334f8 + 5bc0dfc commit d690125
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 119 deletions.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ FROM trailofbits/cxx-common:llvm${LLVM_VERSION}-${DISTRO_BASE}-${ARCH} as deps

ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
RUN if [ "$(uname -m)" = "aarch64" ]; then dpkg --add-architecture armhf; fi && \
apt-get update && \
if [ "$(uname -m)" = "x86_64" ]; then apt-get install -qqy gcc-multilib g++-multilib; fi && \
if [ "$(uname -m)" = "aarch64" ]; then apt-get install -qqy gcc-arm-linux-gnueabihf libstdc++-8-dev:armhf; fi && \
apt-get install -qqy zlib1g-dev libz3-4 ninja-build ccache git python3 curl coreutils build-essential libtinfo-dev lsb-release && \
rm -rf /var/lib/apt/lists/*

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<img src="docs/images/remill_logo.png" />
</p>

Remill is a static binary translator that translates machine code instructions into [LLVM bitcode](http://llvm.org/docs/LangRef.html). It translates AArch64 (64-bit ARMv8), x86 and amd64 machine code (including AVX and AVX512) into LLVM bitcode. AArch32 (32-bit ARMv8 / ARMv7) support is underway.
Remill is a static binary translator that translates machine code instructions into [LLVM bitcode](http://llvm.org/docs/LangRef.html). It translates AArch64 (64-bit ARMv8), SPARC32 (SPARCv8), SPARC64 (SPARCv9), x86 and amd64 machine code (including AVX and AVX512) into LLVM bitcode. AArch32 (32-bit ARMv8 / ARMv7) support is underway.

Remill focuses on accurately lifting instructions. It is meant to be used as a library for other tools, e.g. [McSema](https://github.com/lifting-bits/mcsema).

Expand Down
2 changes: 1 addition & 1 deletion include/remill/Arch/Runtime/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ typedef uint64_t addr64_t;
typedef IF_64BIT_ELSE(addr64_t, addr32_t) addr_t;
typedef IF_64BIT_ELSE(int64_t, int32_t) addr_diff_t;

#if defined(__x86_64__) || defined(__i386__) || defined(_M_X86)
#if defined(__x86_64__) || defined(__i386__) || defined(_M_X86) || defined (__arm__)
typedef unsigned uint128_t __attribute__((mode(TI)));
typedef int int128_t __attribute__((mode(TI)));
#elif defined(__aarch64__)
Expand Down
6 changes: 3 additions & 3 deletions lib/Arch/AArch64/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
# limitations under the License.

cmake_minimum_required(VERSION 3.2)
project(arm_runtime)
project(AARCH64_runtime)

set(ARMRUNTIME_SOURCEFILES
set(AARCH64RUNTIME_SOURCEFILES
Instructions.cpp
BasicBlock.cpp

Expand All @@ -36,7 +36,7 @@ function(add_runtime_helper target_name address_bit_size little_endian)
endif()

add_runtime(${target_name}
SOURCES ${ARMRUNTIME_SOURCEFILES}
SOURCES ${AARCH64RUNTIME_SOURCEFILES}
ADDRESS_SIZE ${address_bit_size}
DEFINITIONS "LITTLE_ENDIAN=${little_endian}"
BCFLAGS "-std=${required_cpp_standard}"
Expand Down
16 changes: 15 additions & 1 deletion lib/Arch/SPARC32/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,26 @@ endif(REMILL_BARRIER_AS_NOP)

function(add_runtime_helper target_name little_endian)
message(" > Generating runtime target: ${target_name}")
# Visual C++ requires C++14
if(WIN32)
set(required_cpp_standard "c++14")
else()
set(required_cpp_standard "c++17")
endif()

# necessary to build code as 32-bit
# on aarch64
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
set(arch_flags "--target=arm-linux-gnueabihf")
else()
set(arch_flags "-m32")
endif()

add_runtime(${target_name}
SOURCES ${SPARC32RUNTIME_SOURCEFILES}
ADDRESS_SIZE 32
DEFINITIONS "LITTLE_ENDIAN=${little_endian}"
BCFLAGS "-m32" "-std=c++17" "${EXTRA_BC_FLAGS}"
BCFLAGS "${arch_flags}" "-std=${required_cpp_standard}" "${EXTRA_BC_FLAGS}"
INCLUDEDIRECTORIES "${REMILL_INCLUDE_DIR}" "${REMILL_SOURCE_DIR}"
INSTALLDESTINATION "${REMILL_INSTALL_SEMANTICS_DIR}"

Expand Down
6 changes: 4 additions & 2 deletions tests/AArch64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ target_compile_options(lift-aarch64-tests
-DIN_TEST_GENERATOR
)

target_link_libraries(lift-aarch64-tests PUBLIC remill ${PROJECT_LIBRARIES})
target_link_libraries(lift-aarch64-tests PUBLIC remill ${PROJECT_LIBRARIES} )
target_include_directories(lift-aarch64-tests PUBLIC ${PROJECT_INCLUDEDIRECTORIES})
target_include_directories(lift-aarch64-tests PRIVATE ${CMAKE_SOURCE_DIR})

add_executable(run-aarch64-tests
EXCLUDE_FROM_ALL
Expand Down Expand Up @@ -71,9 +72,10 @@ add_custom_command(

target_link_libraries(run-aarch64-tests PUBLIC remill ${PROJECT_LIBRARIES})
target_include_directories(run-aarch64-tests PUBLIC ${PROJECT_INCLUDEDIRECTORIES})
target_include_directories(run-aarch64-tests PRIVATE ${CMAKE_SOURCE_DIR})

target_compile_options(run-aarch64-tests
PRIVATE -I${CMAKE_SOURCE_DIR}
PRIVATE #-I${CMAKE_SOURCE_DIR}
-DADDRESS_SIZE_BITS=64
-DGTEST_HAS_RTTI=0
-DGTEST_HAS_TR1_TUPLE=0
Expand Down
180 changes: 70 additions & 110 deletions tests/AArch64/Lift.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017 Trail of Bits, Inc.
* Copyright (c) 2018 Trail of Bits, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,140 +52,100 @@ DEFINE_string(bc_out, "",
DECLARE_string(arch);
DECLARE_string(os);

extern "C" {
int gNativeState [[gnu::used]] = 0;
int gLiftedState [[gnu::used]] = 0;
} // extern

namespace {

// Decode a test and add it as a basic block to the module.
//
// TODO(pag): Eventually handle control-flow.
static void AddFunctionToModule(llvm::Module *module, const remill::Arch *arch,
const test::TestInfo &test) {
DLOG(INFO) << "Adding block for: " << test.test_name;

std::stringstream ss;
ss << SYMBOL_PREFIX << test.test_name << "_lifted";

auto func = remill::DeclareLiftedFunction(module, ss.str());
remill::CloneBlockFunctionInto(func);

func->setLinkage(llvm::GlobalValue::ExternalLinkage);
func->setVisibility(llvm::GlobalValue::DefaultVisibility);
class TestTraceManager : public remill::TraceManager {
public:
virtual ~TestTraceManager(void) = default;

remill::IntrinsicTable intrinsics(module);
remill::InstructionLifter lifter(arch, &intrinsics);

std::map<uint64_t, remill::Instruction> inst;
std::map<uint64_t, llvm::BasicBlock *> blocks;
void SetLiftedTraceDefinition(uint64_t addr,
llvm::Function *lifted_func) override {
traces[addr] = lifted_func;
}

// Function that will create basic blocks as needed.
auto GetOrCreateBlock = [func, &blocks](uint64_t block_pc) {
auto &block = blocks[block_pc];
if (!block) {
block = llvm::BasicBlock::Create(func->getContext(), "", func);
}
return block;
};

std::stringstream seen_insts;
const char *sep = "";

auto entry_block = GetOrCreateBlock(test.test_begin);
llvm::BranchInst::Create(entry_block, &(func->front()));

auto saw_isel = false;
auto addr = test.test_begin;
while (addr < test.test_end) {
std::string inst_bytes;
auto bytes = reinterpret_cast<const char *>(addr);
inst_bytes.insert(inst_bytes.end(), bytes, bytes + 4);

remill::Instruction inst;
CHECK(arch->DecodeInstruction(addr, inst_bytes, inst))
<< "Can't decode test instruction " << inst.Serialize() << " in "
<< test.test_name;

seen_insts << sep << inst.function;
sep = ", ";

LOG(INFO) << "Lifting " << inst.Serialize();

auto block = GetOrCreateBlock(inst.pc);
CHECK(remill::kLiftedInstruction == lifter.LiftIntoBlock(inst, block))
<< "Can't lift test instruction " << inst.Serialize() << " in "
<< test.test_name;

saw_isel = saw_isel || inst.function == test.isel_name;
addr += inst.NumBytes();

// Connect together the basic blocks.
switch (inst.category) {
case remill::Instruction::kCategoryNormal:
case remill::Instruction::kCategoryNoOp:
llvm::BranchInst::Create(GetOrCreateBlock(inst.next_pc), block);
break;

case remill::Instruction::kCategoryDirectJump:
case remill::Instruction::kCategoryDirectFunctionCall:
llvm::BranchInst::Create(GetOrCreateBlock(inst.branch_taken_pc), block);
break;

case remill::Instruction::kCategoryConditionalBranch:
llvm::BranchInst::Create(GetOrCreateBlock(inst.branch_taken_pc),
GetOrCreateBlock(inst.branch_not_taken_pc),
remill::LoadBranchTaken(block), block);
break;

default:
remill::AddTerminatingTailCall(block, intrinsics.missing_block);
break;
llvm::Function *GetLiftedTraceDeclaration(uint64_t addr) override {
auto trace_it = traces.find(addr);
if (trace_it != traces.end()) {
return trace_it->second;
} else {
return nullptr;
}
}

CHECK(saw_isel) << "Test " << test.test_name
<< " does not have an instruction that "
<< "uses the semantics function " << test.isel_name
<< ", saw " << seen_insts.str();
llvm::Function *GetLiftedTraceDefinition(uint64_t addr) override {
return GetLiftedTraceDeclaration(addr);
}

// Terminate any stragglers.
for (auto pc_to_block : blocks) {
auto block = pc_to_block.second;
if (!block->getTerminator()) {
remill::AddTerminatingTailCall(block, intrinsics.missing_block);
bool TryReadExecutableByte(uint64_t addr, uint8_t *byte) override {
auto byte_it = memory.find(addr);
if (byte_it != memory.end()) {
*byte = byte_it->second;
return true;
} else {
return false;
}
}
}

public:
std::unordered_map<uint64_t, uint8_t> memory;
std::unordered_map<uint64_t, llvm::Function *> traces;
};

} // namespace

extern "C" int main(int argc, char *argv[]) {

google::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);

auto os = remill::GetOSName(REMILL_OS);
auto context = new llvm::LLVMContext;

auto arch = remill::Arch::Get(*context, os, remill::kArchAArch64LittleEndian);

DLOG(INFO) << "Generating tests.";

auto bc_file = remill::FindSemanticsBitcodeFile(FLAGS_arch);
auto module = remill::LoadModuleFromFile(context, bc_file);
remill::GetHostArch(*context)->PrepareModule(module.get());

std::vector<const test::TestInfo *> tests;
for (auto i = 0U;; ++i) {
const auto &test = test::__aarch64_test_table_begin[i];
if (&test >= &(test::__aarch64_test_table_end[0]))
if (&test >= &(test::__aarch64_test_table_end[0])) {
break;
AddFunctionToModule(module.get(), arch, test);
}
tests.push_back(&test);
}

TestTraceManager manager;

// Add all code byts from the test cases to the memory.
for (auto test : tests) {
for (auto addr = test->test_begin; addr < test->test_end; ++addr) {
manager.memory[addr] = *reinterpret_cast<uint8_t *>(addr);
}
}

llvm::LLVMContext context;
auto os_name = remill::GetOSName(REMILL_OS);
auto arch_name = remill::GetArchName(FLAGS_arch);
auto arch = remill::Arch::Build(&context, os_name, arch_name);
auto module = remill::LoadArchSemantics(arch);

remill::IntrinsicTable intrinsics(module.get());
remill::InstructionLifter inst_lifter(arch, intrinsics);
remill::TraceLifter trace_lifter(inst_lifter, manager);

for (auto test : tests) {
if (!trace_lifter.Lift(test->test_begin)) {
LOG(ERROR) << "Unable to lift test " << test->test_name;
continue;
}

// Make sure the trace for the test has the right name.
std::stringstream ss;
ss << SYMBOL_PREFIX << test->test_name << "_lifted";

auto lifted_trace = manager.GetLiftedTraceDefinition(test->test_begin);
lifted_trace->setName(ss.str());
}

DLOG(INFO) << "Serializing bitcode to " << FLAGS_bc_out;
remill::StoreModuleToFile(module, FLAGS_bc_out);
auto host_arch = remill::Arch::Build(
&context, os_name, remill::GetArchName(REMILL_ARCH));
host_arch->PrepareModule(module.get());
remill::StoreModuleToFile(module.get(), FLAGS_bc_out);

DLOG(INFO) << "Done.";
return 0;
Expand Down
8 changes: 8 additions & 0 deletions tests/AArch64/Run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ MAKE_RW_MEMORY(64)

MAKE_RW_FP_MEMORY(32)
MAKE_RW_FP_MEMORY(64)
MAKE_RW_FP_MEMORY(128)

NEVER_INLINE float64_t __remill_read_memory_f80(Memory *, addr_t) {
abort();
Expand Down Expand Up @@ -247,6 +248,13 @@ Memory *__remill_atomic_end(Memory *) {
return nullptr;
}

Memory *__remill_delay_slot_begin(Memory *) {
return nullptr;
}
Memory *__remill_delay_slot_end(Memory *) {
return nullptr;
}

void __remill_defer_inlining(void) {}

Memory *__remill_error(AArch64State &, addr_t, Memory *) {
Expand Down

0 comments on commit d690125

Please sign in to comment.