Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rewrite autoPatchelfHook in python #149731

Merged
merged 25 commits into from
Feb 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
66684c7
rewrite autoPatchelfHook in python
layus Dec 8, 2021
8ed4062
Update pkgs/build-support/setup-hooks/auto-patchelf.py
layus Dec 9, 2021
91f96ef
Update pkgs/build-support/setup-hooks/auto-patchelf.py
layus Dec 9, 2021
92b804a
Apply suggestions from code review
layus Dec 9, 2021
f130f13
Fix issues discovered during tests
layus Dec 10, 2021
002f231
Apply suggestions from code review
layus Dec 10, 2021
2335297
fixup line wrapping
layus Dec 10, 2021
03b9396
autoPatchelfHook: Improve compatibility with bash version
layus Dec 21, 2021
442b335
autoPatchelfHook: Fix symlink-reated issues
layus Dec 21, 2021
ba54671
autoPatchelfHook: Revert dubious patchelf invocation test
layus Dec 21, 2021
7d9990a
autoPatchelfHook: Untangle the executable detection logic
layus Dec 21, 2021
811e1a0
fixup! autoPatchelfHook: Untangle the executable detection logic
layus Dec 21, 2021
aa4524f
autoPatchelfHook: Fix invalid borrow issue
layus Dec 22, 2021
abf667e
autoPatchelfHook: Handle runtimeDependencies as the bare string it is
layus Dec 22, 2021
9418ae9
autoPatchelfHook: add bintools dependency
layus Dec 22, 2021
c76f36f
autoPatchelfHook: replace old hook with the rewrite
layus Dec 22, 2021
b758125
autoPatchelfHook: get rid of the old hook content
layus Dec 22, 2021
46e857e
autoPatchelfHook: fix wrong ordering of debug info
layus Dec 22, 2021
18837b0
autoPatchelfHook: persist extra search path across incovations
layus Dec 22, 2021
ebe8a3f
autoPatchelfHook: fix wrong usage of global variables
layus Dec 22, 2021
0b326f2
Update auto-patchelf.py
layus Dec 25, 2021
568ccfe
Apply suggestions from code review
layus Dec 25, 2021
16d661d
autoPatchelfHook: remove imprecise and incorrect warning
layus Dec 25, 2021
6ce3726
Apply explicit types from code review
layus Dec 27, 2021
95a04f7
Complement and polish types and snake_casing
layus Dec 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
329 changes: 329 additions & 0 deletions pkgs/build-support/setup-hooks/auto-patchelf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
#!/usr/bin/env python3

from collections import defaultdict
from contextlib import contextmanager
from dataclasses import dataclass
from elftools.common.exceptions import ELFError # type: ignore
from elftools.elf.dynamic import DynamicSection # type: ignore
from elftools.elf.elffile import ELFFile # type: ignore
from elftools.elf.enums import ENUM_E_TYPE, ENUM_EI_OSABI # type: ignore
from itertools import chain
from pathlib import Path, PurePath

from typing import Tuple, Optional, Iterator, List, DefaultDict, Set

import argparse
import os
import pprint
import subprocess
import sys



@contextmanager
def open_elf(path: Path) -> Iterator[ELFFile]:
with path.open('rb') as stream:
yield ELFFile(stream)


def is_static_executable(elf: ELFFile) -> bool:
# Statically linked executables have an ELF type of EXEC but no INTERP.
return (elf.header["e_type"] == 'ET_EXEC'
and not elf.get_section_by_name(".interp"))


def is_dynamic_executable(elf: ELFFile) -> bool:
# We do not require an ELF type of EXEC. This also catches
# position-independent executables, as they typically have an INTERP
# section but their ELF type is DYN.
return bool(elf.get_section_by_name(".interp"))


def get_dependencies(elf: ELFFile) -> List[str]:
dependencies = []
# This convoluted code is here on purpose. For some reason, using
# elf.get_section_by_name(".dynamic") does not always return an
# instance of DynamicSection, but that is required to call iter_tags
for section in elf.iter_sections():
if isinstance(section, DynamicSection):
for tag in section.iter_tags('DT_NEEDED'):
dependencies.append(tag.needed)
break # There is only one dynamic section

return dependencies


def get_rpath(elf: ELFFile) -> List[str]:
# This convoluted code is here on purpose. For some reason, using
# elf.get_section_by_name(".dynamic") does not always return an
# instance of DynamicSection, but that is required to call iter_tags
for section in elf.iter_sections():
if isinstance(section, DynamicSection):
for tag in section.iter_tags('DT_RUNPATH'):
return tag.runpath.split(':')

for tag in section.iter_tags('DT_RPATH'):
return tag.rpath.split(':')

break # There is only one dynamic section

return []


def get_arch(elf: ELFFile) -> str:
return elf.get_machine_arch()


def get_osabi(elf: ELFFile) -> str:
return elf.header["e_ident"]["EI_OSABI"]


def osabi_are_compatible(wanted: str, got: str) -> bool:
"""
Tests whether two OS ABIs are compatible, taking into account the
generally accepted compatibility of SVR4 ABI with other ABIs.
"""
if not wanted or not got:
# One of the types couldn't be detected, so as a fallback we'll
# assume they're compatible.
return True

# Generally speaking, the base ABI (0x00), which is represented by
# readelf(1) as "UNIX - System V", indicates broad compatibility
# with other ABIs.
#
# TODO: This isn't always true. For example, some OSes embed ABI
# compatibility into SHT_NOTE sections like .note.tag and
# .note.ABI-tag. It would be prudent to add these to the detection
# logic to produce better ABI information.
if wanted == 'ELFOSABI_SYSV':
return True

# Similarly here, we should be able to link against a superset of
# features, so even if the target has another ABI, this should be
# fine.
if got == 'ELFOSABI_SYSV':
return True

# Otherwise, we simply return whether the ABIs are identical.
return wanted == got


def glob(path: Path, pattern: str, recursive: bool) -> Iterator[Path]:
return path.rglob(pattern) if recursive else path.glob(pattern)


cached_paths: Set[Path] = set()
soname_cache: DefaultDict[Tuple[str, str], List[Tuple[Path, str]]] = defaultdict(list)


def populate_cache(initial: List[Path], recursive: bool =False) -> None:
lib_dirs = list(initial)

while lib_dirs:
lib_dir = lib_dirs.pop(0)

if lib_dir in cached_paths:
continue

cached_paths.add(lib_dir)

for path in glob(lib_dir, "*.so*", recursive):
if not path.is_file():
continue

resolved = path.resolve()
try:
with open_elf(path) as elf:
osabi = get_osabi(elf)
arch = get_arch(elf)
rpath = [Path(p) for p in get_rpath(elf)
if p and '$ORIGIN' not in p]
lib_dirs += rpath
soname_cache[(path.name, arch)].append((resolved.parent, osabi))

except ELFError:
# Not an ELF file in the right format
pass


def find_dependency(soname: str, soarch: str, soabi: str) -> Optional[Path]:
for lib, libabi in soname_cache[(soname, soarch)]:
if osabi_are_compatible(soabi, libabi):
return lib
return None


@dataclass
class Dependency:
file: Path # The file that contains the dependency
name: Path # The name of the dependency
found: bool = False # Whether it was found somewhere


def auto_patchelf_file(path: Path, runtime_deps: list[Path]) -> list[Dependency]:
try:
with open_elf(path) as elf:

if is_static_executable(elf):
# No point patching these
print(f"skipping {path} because it is statically linked")
return []

if elf.num_segments() == 0:
# no segment (e.g. object file)
print(f"skipping {path} because it contains no segment")
return []

file_arch = get_arch(elf)
if interpreter_arch != file_arch:
# Our target architecture is different than this file's
# architecture, so skip it.
print(f"skipping {path} because its architecture ({file_arch})"
f" differs from target ({interpreter_arch})")
return []

file_osabi = get_osabi(elf)
if not osabi_are_compatible(interpreter_osabi, file_osabi):
print(f"skipping {path} because its OS ABI ({file_osabi}) is"
f" not compatible with target ({interpreter_osabi})")
return []

file_is_dynamic_executable = is_dynamic_executable(elf)

file_dependencies = map(Path, get_dependencies(elf))

except ELFError:
return []

rpath = []
if file_is_dynamic_executable:
print("setting interpreter of", path)
subprocess.run(
["patchelf", "--set-interpreter", interpreter_path.as_posix(), path.as_posix()],
check=True)
rpath += runtime_deps

print("searching for dependencies of", path)
dependencies = []
# Be sure to get the output of all missing dependencies instead of
# failing at the first one, because it's more useful when working
# on a new package where you don't yet know the dependencies.
for dep in file_dependencies:
if dep.is_absolute() and dep.is_file():
# This is an absolute path. If it exists, just use it.
# Otherwise, we probably want this to produce an error when
# checked (because just updating the rpath won't satisfy
# it).
continue
elif (libc_lib / dep).is_file():
# This library exists in libc, and will be correctly
# resolved by the linker.
continue

if found_dependency := find_dependency(dep.name, file_arch, file_osabi):
rpath.append(found_dependency)
dependencies.append(Dependency(path, dep, True))
print(f" {dep} -> found: {found_dependency}")
else:
dependencies.append(Dependency(path, dep, False))
print(f" {dep} -> not found!")

# Dedup the rpath
rpath_str = ":".join(dict.fromkeys(map(Path.as_posix, rpath)))

if rpath:
print("setting RPATH to:", rpath_str)
subprocess.run(
["patchelf", "--set-rpath", rpath_str, path.as_posix()],
check=True)

return dependencies


def auto_patchelf(
paths_to_patch: List[Path],
lib_dirs: List[Path],
runtime_deps: List[Path],
recursive: bool =True,
ignore_missing: bool =False) -> None:

if not paths_to_patch:
sys.exit("No paths to patch, stopping.")

# Add all shared objects of the current output path to the cache,
# before lib_dirs, so that they are chosen first in find_dependency.
populate_cache(paths_to_patch, recursive)
populate_cache(lib_dirs)

dependencies = []
for path in chain.from_iterable(glob(p, '*', recursive) for p in paths_to_patch):
if not path.is_symlink() and path.is_file():
dependencies += auto_patchelf_file(path, runtime_deps)

missing = [dep for dep in dependencies if not dep.found]

# Print a summary of the missing dependencies at the end
for dep in missing:
print(f"auto-patchelf could not satisfy dependency {dep.name} wanted by {dep.file}")

if missing and not ignore_missing:
sys.exit('auto-patchelf failed to find all the required dependencies.\n'
'Add the missing dependencies to --libs or use --ignore-missing.')


def main() -> None:
parser = argparse.ArgumentParser(
prog="auto-patchelf",
description='auto-patchelf tries as hard as possible to patch the'
' provided binary files by looking for compatible'
'libraries in the provided paths.')
parser.add_argument(
"--ignore-missing",
action="store_true",
help="Do not fail when some dependencies are not found.")
parser.add_argument(
"--no-recurse",
dest="recursive",
action="store_false",
help="Patch only the provided paths, and ignore their children")
parser.add_argument(
"--paths", nargs="*", type=Path,
help="Paths whose content needs to be patched.")
parser.add_argument(
"--libs", nargs="*", type=Path,
help="Paths where libraries are searched for.")
parser.add_argument(
"--runtime-dependencies", nargs="*", type=Path,
help="Paths to prepend to the runtime path of executable binaries.")

print("automatically fixing dependencies for ELF files")
args = parser.parse_args()
pprint.pprint(vars(args))

auto_patchelf(
args.paths,
args.libs,
args.runtime_dependencies,
args.recursive,
args.ignore_missing)


interpreter_path: Path = None # type: ignore
interpreter_osabi: str = None # type: ignore
interpreter_arch: str = None # type: ignore
libc_lib: Path = None # type: ignore

if __name__ == "__main__":
nix_support = Path(os.environ['NIX_BINTOOLS']) / 'nix-support'
interpreter_path = Path((nix_support / 'dynamic-linker').read_text().strip())
libc_lib = Path((nix_support / 'orig-libc').read_text().strip()) / 'lib'

with open_elf(interpreter_path) as interpreter:
interpreter_osabi = get_osabi(interpreter)
interpreter_arch = get_arch(interpreter)

if interpreter_arch and interpreter_osabi and interpreter_path and libc_lib:
main()
else:
sys.exit("Failed to parse dynamic linker (ld) properties.")
Loading