Skip to content

Commit

Permalink
Find the symbols via DT_GNU_HASH instead of DT_HASH. (Snaipe#36)
Browse files Browse the repository at this point in the history
* Switch to CMake 3.5.

This avoids a warning when building with modern
CMake.

Signed-off-by: Chris Lalancette <clalancette@gmail.com>

* Find the symbols via DT_GNU_HASH instead of DT_HASH.

Since glibc 2.36 (released in August 2022), builds of libc.so.6
are built with the default value of --hash-style on all platforms.
The immediate effect of this is that linker no longer generates
a DT_HASH section, which is what Mimick uses to detect vital
functions like vfprintf and abort.

It turns out that Ubuntu and Debian specifically override this
behavior on amd64 and i386, since there are some proprietary
applications on those platforms that depend on this.  However,
this override is *not* applied on aarch64, so there is no DT_HASH.
This explains the discrepancy we see when running CI on amd64
(where Mimick tests succeed) and aarch64 (where Mimick tests fail).

It also turns out that DT_HASH is "deprecated", and has been for
about 15 years.  Thus, all of our platforms (going back to RHEL-8)
support this construct.

Thus, this commit implements getting symbols from DT_GNU_HASH instead
of from DT_HASH.  Note that it also changes it so that we *prefer*
to get the data from DT_GNU_HASH, as someday DT_HASH may go away
entirely.

I should note that I borrowed heavily from
https://flapenguin.me/elf-dt-gnu-hash and
https://sourceware.org/git/?p=glibc.git;a=blob;f=elf/dl-lookup.c;h=3d2369dbf2b7ca219eaf80a820e2a8e1329fbf50;hb=HEAD#l350
to implement this, though I made a bunch of changes to fix
warnings and better integrate into the Mimick source code.

Signed-off-by: Chris Lalancette <clalancette@gmail.com>
  • Loading branch information
clalancette authored May 28, 2024
1 parent 9af748b commit 4dbd495
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 6 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Redistribution and use of this file is allowed according to the terms of the MIT license.
# For details see the LICENSE file distributed with Mimick.

cmake_minimum_required (VERSION 2.8.12)
cmake_minimum_required (VERSION 3.5)

project (Mimick C CXX)
# Default to C++11
Expand Down
87 changes: 82 additions & 5 deletions src/plt-elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,16 @@ void plt_reset_offsets(plt_offset *offset, size_t nb_off)
}
}

static uint32_t elf_gnu_hash(const char * name)
{
uint32_t h = 5381;

for (unsigned char c = *name; c != '\0'; c = *++name)
h = (h << 5) + h + *name;

return h;
}

static unsigned long elf_hash (const char *s)
{
unsigned long h = 0, high;
Expand All @@ -329,6 +339,54 @@ static unsigned long elf_hash (const char *s)
return h;
}

static ElfW(Sym) *elf_gnu_hash_find(const ElfW(Word) *gnu_hash, ElfW(Sym) *symtab,
const char *strtab, const char *name)
{
const uint32_t namehash = elf_gnu_hash(name);

const uint32_t nbuckets = gnu_hash[0];
const uint32_t symoffset = gnu_hash[1];
const uint32_t bloom_size = gnu_hash[2];
const uint32_t bloom_shift = gnu_hash[3];
const ElfWord *bloom = (void*)&gnu_hash[4];
const uint32_t *buckets = (void*)&bloom[bloom_size];
const uint32_t *chain = &buckets[nbuckets];

ElfWord word = bloom[(namehash / MMK_BITS) % bloom_size];
ElfWord mask = 0
| (ElfWord)1 << (namehash % MMK_BITS)
| (ElfWord)1 << ((namehash >> bloom_shift) % MMK_BITS);

/* If at least one bit is not set, a symbol is surely missing. */
if ((word & mask) != mask)
return NULL;

uint32_t symix = buckets[namehash % nbuckets];
if (symix < symoffset)
return NULL;

size_t name_len = mmk_strlen(name);

/* Loop through the chain. */
while (1) {
const char *symname = strtab + symtab[symix].st_name;
const uint32_t hash = chain[symix - symoffset];
size_t symname_len = strlen(symname);
size_t cmp_len = (name_len < symname_len) ? name_len : symname_len;

if ((namehash|1) == (hash|1) && mmk_memcmp(name, symname, cmp_len) == 0)
return &symtab[symix];

/* Chain ends with an element with the lowest bit set to 1. */
if (hash & 1)
break;

symix++;
}

return NULL;
}

static ElfW(Sym) *elf_hash_find(ElfW(Word) *hash, ElfW(Sym) *symtab,
const char *strtab, const char *name)
{
Expand All @@ -351,13 +409,32 @@ static ElfW(Sym) *elf_hash_find(ElfW(Word) *hash, ElfW(Sym) *symtab,

static ElfW(Sym) *sym_lookup_dyn(plt_lib lib, const char *name)
{
ElfW(Word) *hash = (ElfW(Word)*) lib_dt_lookup(lib, DT_HASH);
ElfW(Sym) *symtab = (ElfW(Sym)*) lib_dt_lookup(lib, DT_SYMTAB);
const char *strtab = (const char*) lib_dt_lookup(lib, DT_STRTAB);
ElfW(Sym) *symtab = (ElfW(Sym)*) lib_dt_lookup(lib, DT_SYMTAB);
if (!symtab)
return NULL;

if (!hash || !symtab || !strtab)
const char *strtab = (const char*) lib_dt_lookup(lib, DT_STRTAB);
if (!strtab)
return NULL;
return elf_hash_find (hash, symtab, strtab, name);

ElfW(Sym) *symbol = NULL;

// DT_GNU_HASH is the "modern" way to lookup symbols. If we have that,
// use it.
ElfW(Word) *gnu_hash = (ElfW(Word)*) lib_dt_lookup(lib, DT_GNU_HASH);
if (gnu_hash) {
symbol = elf_gnu_hash_find(gnu_hash, symtab, strtab, name);
if (symbol)
return symbol;
}

// DT_HASH is the older, deprecated way to find symbols. Only
// attempt to use it if we can't find the symbols via DT_GNU_HASH.
ElfW(Word) *hash = (ElfW(Word)*) lib_dt_lookup(lib, DT_HASH);
if (hash)
symbol = elf_hash_find (hash, symtab, strtab, name);

return symbol;
}

plt_fn *plt_get_real_fn(plt_ctx ctx, const char *name)
Expand Down

0 comments on commit 4dbd495

Please sign in to comment.