diff --git a/pkgs/build-support/setup-hooks/auto-patchelf.py b/pkgs/build-support/setup-hooks/auto-patchelf.py new file mode 100644 index 0000000000000..26fd623e3da08 --- /dev/null +++ b/pkgs/build-support/setup-hooks/auto-patchelf.py @@ -0,0 +1,329 @@ +#!/usr/bin/env python3 + +from collections import defaultdict +from contextlib import contextmanager +from dataclasses import dataclass +from elftools.common.exceptions import ELFError # type: ignore +from elftools.elf.dynamic import DynamicSection # type: ignore +from elftools.elf.elffile import ELFFile # type: ignore +from elftools.elf.enums import ENUM_E_TYPE, ENUM_EI_OSABI # type: ignore +from itertools import chain +from pathlib import Path, PurePath + +from typing import Tuple, Optional, Iterator, List, DefaultDict, Set + +import argparse +import os +import pprint +import subprocess +import sys + + + +@contextmanager +def open_elf(path: Path) -> Iterator[ELFFile]: + with path.open('rb') as stream: + yield ELFFile(stream) + + +def is_static_executable(elf: ELFFile) -> bool: + # Statically linked executables have an ELF type of EXEC but no INTERP. + return (elf.header["e_type"] == 'ET_EXEC' + and not elf.get_section_by_name(".interp")) + + +def is_dynamic_executable(elf: ELFFile) -> bool: + # We do not require an ELF type of EXEC. This also catches + # position-independent executables, as they typically have an INTERP + # section but their ELF type is DYN. + return bool(elf.get_section_by_name(".interp")) + + +def get_dependencies(elf: ELFFile) -> List[str]: + dependencies = [] + # This convoluted code is here on purpose. For some reason, using + # elf.get_section_by_name(".dynamic") does not always return an + # instance of DynamicSection, but that is required to call iter_tags + for section in elf.iter_sections(): + if isinstance(section, DynamicSection): + for tag in section.iter_tags('DT_NEEDED'): + dependencies.append(tag.needed) + break # There is only one dynamic section + + return dependencies + + +def get_rpath(elf: ELFFile) -> List[str]: + # This convoluted code is here on purpose. For some reason, using + # elf.get_section_by_name(".dynamic") does not always return an + # instance of DynamicSection, but that is required to call iter_tags + for section in elf.iter_sections(): + if isinstance(section, DynamicSection): + for tag in section.iter_tags('DT_RUNPATH'): + return tag.runpath.split(':') + + for tag in section.iter_tags('DT_RPATH'): + return tag.rpath.split(':') + + break # There is only one dynamic section + + return [] + + +def get_arch(elf: ELFFile) -> str: + return elf.get_machine_arch() + + +def get_osabi(elf: ELFFile) -> str: + return elf.header["e_ident"]["EI_OSABI"] + + +def osabi_are_compatible(wanted: str, got: str) -> bool: + """ + Tests whether two OS ABIs are compatible, taking into account the + generally accepted compatibility of SVR4 ABI with other ABIs. + """ + if not wanted or not got: + # One of the types couldn't be detected, so as a fallback we'll + # assume they're compatible. + return True + + # Generally speaking, the base ABI (0x00), which is represented by + # readelf(1) as "UNIX - System V", indicates broad compatibility + # with other ABIs. + # + # TODO: This isn't always true. For example, some OSes embed ABI + # compatibility into SHT_NOTE sections like .note.tag and + # .note.ABI-tag. It would be prudent to add these to the detection + # logic to produce better ABI information. + if wanted == 'ELFOSABI_SYSV': + return True + + # Similarly here, we should be able to link against a superset of + # features, so even if the target has another ABI, this should be + # fine. + if got == 'ELFOSABI_SYSV': + return True + + # Otherwise, we simply return whether the ABIs are identical. + return wanted == got + + +def glob(path: Path, pattern: str, recursive: bool) -> Iterator[Path]: + return path.rglob(pattern) if recursive else path.glob(pattern) + + +cached_paths: Set[Path] = set() +soname_cache: DefaultDict[Tuple[str, str], List[Tuple[Path, str]]] = defaultdict(list) + + +def populate_cache(initial: List[Path], recursive: bool =False) -> None: + lib_dirs = list(initial) + + while lib_dirs: + lib_dir = lib_dirs.pop(0) + + if lib_dir in cached_paths: + continue + + cached_paths.add(lib_dir) + + for path in glob(lib_dir, "*.so*", recursive): + if not path.is_file(): + continue + + resolved = path.resolve() + try: + with open_elf(path) as elf: + osabi = get_osabi(elf) + arch = get_arch(elf) + rpath = [Path(p) for p in get_rpath(elf) + if p and '$ORIGIN' not in p] + lib_dirs += rpath + soname_cache[(path.name, arch)].append((resolved.parent, osabi)) + + except ELFError: + # Not an ELF file in the right format + pass + + +def find_dependency(soname: str, soarch: str, soabi: str) -> Optional[Path]: + for lib, libabi in soname_cache[(soname, soarch)]: + if osabi_are_compatible(soabi, libabi): + return lib + return None + + +@dataclass +class Dependency: + file: Path # The file that contains the dependency + name: Path # The name of the dependency + found: bool = False # Whether it was found somewhere + + +def auto_patchelf_file(path: Path, runtime_deps: list[Path]) -> list[Dependency]: + try: + with open_elf(path) as elf: + + if is_static_executable(elf): + # No point patching these + print(f"skipping {path} because it is statically linked") + return [] + + if elf.num_segments() == 0: + # no segment (e.g. object file) + print(f"skipping {path} because it contains no segment") + return [] + + file_arch = get_arch(elf) + if interpreter_arch != file_arch: + # Our target architecture is different than this file's + # architecture, so skip it. + print(f"skipping {path} because its architecture ({file_arch})" + f" differs from target ({interpreter_arch})") + return [] + + file_osabi = get_osabi(elf) + if not osabi_are_compatible(interpreter_osabi, file_osabi): + print(f"skipping {path} because its OS ABI ({file_osabi}) is" + f" not compatible with target ({interpreter_osabi})") + return [] + + file_is_dynamic_executable = is_dynamic_executable(elf) + + file_dependencies = map(Path, get_dependencies(elf)) + + except ELFError: + return [] + + rpath = [] + if file_is_dynamic_executable: + print("setting interpreter of", path) + subprocess.run( + ["patchelf", "--set-interpreter", interpreter_path.as_posix(), path.as_posix()], + check=True) + rpath += runtime_deps + + print("searching for dependencies of", path) + dependencies = [] + # Be sure to get the output of all missing dependencies instead of + # failing at the first one, because it's more useful when working + # on a new package where you don't yet know the dependencies. + for dep in file_dependencies: + if dep.is_absolute() and dep.is_file(): + # This is an absolute path. If it exists, just use it. + # Otherwise, we probably want this to produce an error when + # checked (because just updating the rpath won't satisfy + # it). + continue + elif (libc_lib / dep).is_file(): + # This library exists in libc, and will be correctly + # resolved by the linker. + continue + + if found_dependency := find_dependency(dep.name, file_arch, file_osabi): + rpath.append(found_dependency) + dependencies.append(Dependency(path, dep, True)) + print(f" {dep} -> found: {found_dependency}") + else: + dependencies.append(Dependency(path, dep, False)) + print(f" {dep} -> not found!") + + # Dedup the rpath + rpath_str = ":".join(dict.fromkeys(map(Path.as_posix, rpath))) + + if rpath: + print("setting RPATH to:", rpath_str) + subprocess.run( + ["patchelf", "--set-rpath", rpath_str, path.as_posix()], + check=True) + + return dependencies + + +def auto_patchelf( + paths_to_patch: List[Path], + lib_dirs: List[Path], + runtime_deps: List[Path], + recursive: bool =True, + ignore_missing: bool =False) -> None: + + if not paths_to_patch: + sys.exit("No paths to patch, stopping.") + + # Add all shared objects of the current output path to the cache, + # before lib_dirs, so that they are chosen first in find_dependency. + populate_cache(paths_to_patch, recursive) + populate_cache(lib_dirs) + + dependencies = [] + for path in chain.from_iterable(glob(p, '*', recursive) for p in paths_to_patch): + if not path.is_symlink() and path.is_file(): + dependencies += auto_patchelf_file(path, runtime_deps) + + missing = [dep for dep in dependencies if not dep.found] + + # Print a summary of the missing dependencies at the end + for dep in missing: + print(f"auto-patchelf could not satisfy dependency {dep.name} wanted by {dep.file}") + + if missing and not ignore_missing: + sys.exit('auto-patchelf failed to find all the required dependencies.\n' + 'Add the missing dependencies to --libs or use --ignore-missing.') + + +def main() -> None: + parser = argparse.ArgumentParser( + prog="auto-patchelf", + description='auto-patchelf tries as hard as possible to patch the' + ' provided binary files by looking for compatible' + 'libraries in the provided paths.') + parser.add_argument( + "--ignore-missing", + action="store_true", + help="Do not fail when some dependencies are not found.") + parser.add_argument( + "--no-recurse", + dest="recursive", + action="store_false", + help="Patch only the provided paths, and ignore their children") + parser.add_argument( + "--paths", nargs="*", type=Path, + help="Paths whose content needs to be patched.") + parser.add_argument( + "--libs", nargs="*", type=Path, + help="Paths where libraries are searched for.") + parser.add_argument( + "--runtime-dependencies", nargs="*", type=Path, + help="Paths to prepend to the runtime path of executable binaries.") + + print("automatically fixing dependencies for ELF files") + args = parser.parse_args() + pprint.pprint(vars(args)) + + auto_patchelf( + args.paths, + args.libs, + args.runtime_dependencies, + args.recursive, + args.ignore_missing) + + +interpreter_path: Path = None # type: ignore +interpreter_osabi: str = None # type: ignore +interpreter_arch: str = None # type: ignore +libc_lib: Path = None # type: ignore + +if __name__ == "__main__": + nix_support = Path(os.environ['NIX_BINTOOLS']) / 'nix-support' + interpreter_path = Path((nix_support / 'dynamic-linker').read_text().strip()) + libc_lib = Path((nix_support / 'orig-libc').read_text().strip()) / 'lib' + + with open_elf(interpreter_path) as interpreter: + interpreter_osabi = get_osabi(interpreter) + interpreter_arch = get_arch(interpreter) + + if interpreter_arch and interpreter_osabi and interpreter_path and libc_lib: + main() + else: + sys.exit("Failed to parse dynamic linker (ld) properties.") diff --git a/pkgs/build-support/setup-hooks/auto-patchelf.sh b/pkgs/build-support/setup-hooks/auto-patchelf.sh index 4b3a1c5c39092..9822674196ae0 100644 --- a/pkgs/build-support/setup-hooks/auto-patchelf.sh +++ b/pkgs/build-support/setup-hooks/auto-patchelf.sh @@ -1,289 +1,21 @@ #!/usr/bin/env bash declare -a autoPatchelfLibs -declare -Ag autoPatchelfFailedDeps +declare -a extraAutoPatchelfLibs gatherLibraries() { autoPatchelfLibs+=("$1/lib") } -# wrapper around patchelf to raise proper error messages -# containing the tried file name and command -runPatchelf() { - patchelf "$@" || (echo "Command failed: patchelf $*" && exit 1) -} - # shellcheck disable=SC2154 # (targetOffset is referenced but not assigned.) addEnvHooks "$targetOffset" gatherLibraries -isExecutable() { - # For dynamically linked ELF files it would be enough to check just for the - # INTERP section. However, we won't catch statically linked executables as - # they only have an ELF type of EXEC but no INTERP. - # - # So what we do here is just check whether *either* the ELF type is EXEC - # *or* there is an INTERP section. This also catches position-independent - # executables, as they typically have an INTERP section but their ELF type - # is DYN. - isExeResult="$(LANG=C $READELF -h -l "$1" 2> /dev/null \ - | grep '^ *Type: *EXEC\>\|^ *INTERP\>')" - # not using grep -q, because it can cause Broken pipe - # https://unix.stackexchange.com/questions/305547/broken-pipe-when-grepping-output-but-only-with-i-flag - [ -n "$isExeResult" ] -} - -# We cache dependencies so that we don't need to search through all of them on -# every consecutive call to findDependency. -declare -Ag autoPatchelfCachedDepsAssoc -declare -ag autoPatchelfCachedDeps - -addToDepCache() { - if [[ ${autoPatchelfCachedDepsAssoc[$1]+f} ]]; then return; fi - - # store deps in an assoc. array for efficient lookups - # otherwise findDependency would have quadratic complexity - autoPatchelfCachedDepsAssoc["$1"]="" - - # also store deps in normal array to maintain their order - autoPatchelfCachedDeps+=("$1") -} - -declare -gi depCacheInitialised=0 -declare -gi doneRecursiveSearch=0 -declare -g foundDependency - -getDepsFromElfBinary() { - # NOTE: This does not use runPatchelf because it may encounter non-ELF - # files. Caller is expected to check the return code if needed. - patchelf --print-needed "$1" 2> /dev/null -} - -getRpathFromElfBinary() { - # NOTE: This does not use runPatchelf because it may encounter non-ELF - # files. Caller is expected to check the return code if needed. - local rpath - IFS=':' read -ra rpath < <(patchelf --print-rpath "$1" 2> /dev/null) || return $? - - printf "%s\n" "${rpath[@]}" -} - -populateCacheForDep() { - local so="$1" - local rpath found - rpath="$(getRpathFromElfBinary "$so")" || return 1 - - for found in $(getDepsFromElfBinary "$so"); do - local rpathElem - for rpathElem in $rpath; do - # Ignore empty element or $ORIGIN magic variable which should be - # deterministically resolved by adding this package's library - # files early anyway. - # - # shellcheck disable=SC2016 - # (Expressions don't expand in single quotes, use double quotes for - # that.) - if [[ -z "$rpathElem" || "$rpathElem" == *'$ORIGIN'* ]]; then - continue - fi - - local soname="${found%.so*}" - local foundso= - for foundso in "$rpathElem/$soname".so*; do - addToDepCache "$foundso" - done - - # Found in this element of the rpath, no need to check others. - if [ -n "$foundso" ]; then - break - fi - done - done - - # Not found in any rpath element. - return 1 -} - -populateCacheWithRecursiveDeps() { - # Dependencies may add more to the end of this array, so we use a counter - # with while instead of a regular for loop here. - local -i i=0 - while [ $i -lt ${#autoPatchelfCachedDeps[@]} ]; do - populateCacheForDep "${autoPatchelfCachedDeps[$i]}" - i=$i+1 - done -} - -getBinArch() { - $OBJDUMP -f "$1" 2> /dev/null | sed -ne 's/^architecture: *\([^,]\+\).*/\1/p' -} - -# Returns the specific OS ABI for an ELF file in the format produced by -# readelf(1), like "UNIX - System V" or "UNIX - GNU". -getBinOsabi() { - $READELF -h "$1" 2> /dev/null | sed -ne 's/^[ \t]*OS\/ABI:[ \t]*\(.*\)/\1/p' -} - -# Tests whether two OS ABIs are compatible, taking into account the generally -# accepted compatibility of SVR4 ABI with other ABIs. -areBinOsabisCompatible() { - local wanted="$1" - local got="$2" - - if [[ -z "$wanted" || -z "$got" ]]; then - # One of the types couldn't be detected, so as a fallback we'll assume - # they're compatible. - return 0 - fi - - # Generally speaking, the base ABI (0x00), which is represented by - # readelf(1) as "UNIX - System V", indicates broad compatibility with other - # ABIs. - # - # TODO: This isn't always true. For example, some OSes embed ABI - # compatibility into SHT_NOTE sections like .note.tag and .note.ABI-tag. - # It would be prudent to add these to the detection logic to produce better - # ABI information. - if [[ "$wanted" == "UNIX - System V" ]]; then - return 0 - fi - - # Similarly here, we should be able to link against a superset of features, - # so even if the target has another ABI, this should be fine. - if [[ "$got" == "UNIX - System V" ]]; then - return 0 - fi - - # Otherwise, we simply return whether the ABIs are identical. - if [[ "$wanted" == "$got" ]]; then - return 0 - fi - - return 1 -} - -# NOTE: If you want to use this function outside of the autoPatchelf function, -# keep in mind that the dependency cache is only valid inside the subshell -# spawned by the autoPatchelf function, so invoking this directly will possibly -# rebuild the dependency cache. See the autoPatchelf function below for more -# information. -findDependency() { - local filename="$1" - local arch="$2" - local osabi="$3" - local lib dep - - if [ $depCacheInitialised -eq 0 ]; then - for lib in "${autoPatchelfLibs[@]}"; do - for so in "$lib/"*.so*; do addToDepCache "$so"; done - done - depCacheInitialised=1 - fi - - for dep in "${autoPatchelfCachedDeps[@]}"; do - if [ "$filename" = "${dep##*/}" ]; then - if [ "$(getBinArch "$dep")" = "$arch" ] && areBinOsabisCompatible "$osabi" "$(getBinOsabi "$dep")"; then - foundDependency="$dep" - return 0 - fi - fi - done - - # Populate the dependency cache with recursive dependencies *only* if we - # didn't find the right dependency so far and afterwards run findDependency - # again, but this time with $doneRecursiveSearch set to 1 so that it won't - # recurse again (and thus infinitely). - if [ $doneRecursiveSearch -eq 0 ]; then - populateCacheWithRecursiveDeps - doneRecursiveSearch=1 - findDependency "$filename" "$arch" || return 1 - return 0 - fi - return 1 -} - -autoPatchelfFile() { - local dep rpath="" toPatch="$1" - - local interpreter - interpreter="$(< "$NIX_BINTOOLS/nix-support/dynamic-linker")" - - local interpreterArch interpreterOsabi toPatchArch toPatchOsabi - interpreterArch="$(getBinArch "$interpreter")" - interpreterOsabi="$(getBinOsabi "$interpreter")" - toPatchArch="$(getBinArch "$toPatch")" - toPatchOsabi="$(getBinOsabi "$toPatch")" - - if [ "$interpreterArch" != "$toPatchArch" ]; then - # Our target architecture is different than this file's architecture, - # so skip it. - echo "skipping $toPatch because its architecture ($toPatchArch) differs from target ($interpreterArch)" >&2 - return 0 - elif ! areBinOsabisCompatible "$interpreterOsabi" "$toPatchOsabi"; then - echo "skipping $toPatch because its OS ABI ($toPatchOsabi) is not compatible with target ($interpreterOsabi)" >&2 - return 0 - fi - - if isExecutable "$toPatch"; then - runPatchelf --set-interpreter "$interpreter" "$toPatch" - # shellcheck disable=SC2154 - # (runtimeDependencies is referenced but not assigned.) - if [ -n "$runtimeDependencies" ]; then - for dep in $runtimeDependencies; do - rpath="$rpath${rpath:+:}$dep/lib" - done - fi - fi - - local libcLib - libcLib="$(< "$NIX_BINTOOLS/nix-support/orig-libc")/lib" - - echo "searching for dependencies of $toPatch" >&2 - - local missing - missing="$(getDepsFromElfBinary "$toPatch")" || return 0 - - # This ensures that we get the output of all missing dependencies instead - # of failing at the first one, because it's more useful when working on a - # new package where you don't yet know its dependencies. - - for dep in $missing; do - if [[ "$dep" == /* ]]; then - # This is an absolute path. If it exists, just use it. Otherwise, - # we probably want this to produce an error when checked (because - # just updating the rpath won't satisfy it). - if [ -f "$dep" ]; then - continue - fi - elif [ -f "$libcLib/$dep" ]; then - # This library exists in libc, and will be correctly resolved by - # the linker. - continue - fi - - echo -n " $dep -> " >&2 - if findDependency "$dep" "$toPatchArch" "$toPatchOsabi"; then - rpath="$rpath${rpath:+:}${foundDependency%/*}" - echo "found: $foundDependency" >&2 - else - echo "not found!" >&2 - autoPatchelfFailedDeps["$dep"]="$toPatch" - fi - done - - if [ -n "$rpath" ]; then - echo "setting RPATH to: $rpath" >&2 - runPatchelf --set-rpath "$rpath" "$toPatch" - fi -} - # Can be used to manually add additional directories with shared object files # to be included for the next autoPatchelf invocation. addAutoPatchelfSearchPath() { local -a findOpts=() - # XXX: Somewhat similar to the one in the autoPatchelf function, maybe make - # it DRY someday... while [ $# -gt 0 ]; do case "$1" in --) shift; break;; @@ -296,15 +28,19 @@ addAutoPatchelfSearchPath() { esac done - while IFS= read -r -d '' file; do - addToDepCache "$file" + local dir= + while IFS= read -r -d '' dir; do + extraAutoPatchelfLibs+=("$dir") done < <(find "$@" "${findOpts[@]}" \! -type d \ - \( -name '*.so' -o -name '*.so.*' \) -print0) + \( -name '*.so' -o -name '*.so.*' \) -print0 \ + | sed -z 's#/[^/]*$##' \ + | uniq -z + ) } + autoPatchelf() { local norecurse= - while [ $# -gt 0 ]; do case "$1" in --) shift; break;; @@ -317,47 +53,14 @@ autoPatchelf() { esac done - if [ $# -eq 0 ]; then - echo "autoPatchelf: No paths to patch specified." >&2 - return 1 - fi - - echo "automatically fixing dependencies for ELF files" >&2 - - # Add all shared objects of the current output path to the start of - # autoPatchelfCachedDeps so that it's chosen first in findDependency. - addAutoPatchelfSearchPath ${norecurse:+--no-recurse} -- "$@" - - while IFS= read -r -d $'\0' file; do - isELF "$file" || continue - segmentHeaders="$(LANG=C $READELF -l "$file")" - # Skip if the ELF file doesn't have segment headers (eg. object files). - # not using grep -q, because it can cause Broken pipe - grep -q '^Program Headers:' <<<"$segmentHeaders" || continue - if isExecutable "$file"; then - # Skip if the executable is statically linked. - grep -q "^ *INTERP\\>" <<<"$segmentHeaders" || continue - fi - # Jump file if patchelf is unable to parse it - # Some programs contain binary blobs for testing, - # which are identified as ELF but fail to be parsed by patchelf - patchelf "$file" || continue - autoPatchelfFile "$file" - done < <(find "$@" ${norecurse:+-maxdepth 1} -type f -print0) - - # fail if any dependencies were not found and - # autoPatchelfIgnoreMissingDeps is not set - local depsMissing=0 - for failedDep in "${!autoPatchelfFailedDeps[@]}"; do - echo "autoPatchelfHook could not satisfy dependency $failedDep wanted by ${autoPatchelfFailedDeps[$failedDep]}" - depsMissing=1 - done - # shellcheck disable=SC2154 - # (autoPatchelfIgnoreMissingDeps is referenced but not assigned.) - if [[ $depsMissing == 1 && -z "$autoPatchelfIgnoreMissingDeps" ]]; then - echo "Add the missing dependencies to the build inputs or set autoPatchelfIgnoreMissingDeps=true" - exit 1 - fi + local runtimeDependenciesArray=($runtimeDependencies) + @pythonInterpreter@ @autoPatchelfScript@ \ + ${norecurse:+--no-recurse} \ + ${autoPatchelfIgnoreMissingDeps:+--ignore-missing} \ + --paths "$@" \ + --libs "${autoPatchelfLibs[@]}" \ + "${extraAutoPatchelfLibs[@]}" \ + --runtime-dependencies "${runtimeDependenciesArray[@]/%//lib}" } # XXX: This should ultimately use fixupOutputHooks but we currently don't have diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 7e5f890294904..72644d56e06dc 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -143,9 +143,14 @@ with pkgs; autorestic = callPackage ../tools/backup/autorestic { }; - autoPatchelfHook = makeSetupHook - { name = "auto-patchelf-hook"; deps = [ bintools ]; } - ../build-support/setup-hooks/auto-patchelf.sh; + autoPatchelfHook = makeSetupHook { + name = "auto-patchelf-hook"; + deps = [ bintools ]; + substitutions = { + pythonInterpreter = "${python3.withPackages (ps: [ ps.pyelftools ])}/bin/python"; + autoPatchelfScript = ../build-support/setup-hooks/auto-patchelf.py; + }; + } ../build-support/setup-hooks/auto-patchelf.sh; appflowy = callPackage ../applications/office/appflowy { };