Skip to content

Commit

Permalink
Auto merge of rust-lang#137426 - DianQK:link-used, r=<try>
Browse files Browse the repository at this point in the history
Link object files that use `#[used]`

By directly linking the object files that use `#[used]`, we ensure the linker can see them.

This approach allows `#[used]` to avoid modifying symbol visibility, preserving local symbols. A similar example in C would be:

```c
// foo.c
__attribute__((constructor)) static void foo() {}
// main.c
void main(void) {}
```

If `foo.c` is placed in a static library, it will never be loaded unless the entire static library is fully loaded by `--whole-archive`.

This pull request removes some of the symbols in `symbols.o`. We can remove more symbols in a follow-up PR.
  • Loading branch information
bors committed Feb 24, 2025
2 parents 9af8985 + bb77554 commit 46536f1
Show file tree
Hide file tree
Showing 15 changed files with 282 additions and 49 deletions.
50 changes: 50 additions & 0 deletions compiler/rustc_codegen_ssa/src/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::{env, fmt, fs, io, mem, str};

use cc::windows_registry;
use itertools::Itertools;
use object::read::archive::ArchiveFile;
use regex::Regex;
use rustc_arena::TypedArena;
use rustc_ast::CRATE_NODE_ID;
Expand Down Expand Up @@ -78,6 +79,7 @@ pub fn link_binary(
let _timer = sess.timer("link_binary");
let output_metadata = sess.opts.output_types.contains_key(&OutputType::Metadata);
let mut tempfiles_for_stdout_output: Vec<PathBuf> = Vec::new();
let mut tempfiles_for_linked_objects: Vec<PathBuf> = Vec::new();
for &crate_type in &codegen_results.crate_info.crate_types {
// Ignore executable crates if we have -Z no-codegen, as they will error.
if (sess.opts.unstable_opts.no_codegen || !sess.opts.output_types.should_codegen())
Expand Down Expand Up @@ -142,6 +144,8 @@ pub fn link_binary(
&out_filename,
&codegen_results,
path.as_ref(),
&mut tempfiles_for_linked_objects,
outputs,
);
}
}
Expand Down Expand Up @@ -214,6 +218,10 @@ pub fn link_binary(
ensure_removed(sess.dcx(), &temp);
}

for temp in tempfiles_for_linked_objects {
ensure_removed(sess.dcx(), &temp);
}

// If no requested outputs require linking, then the object temporaries should
// be kept.
if !sess.opts.output_types.should_link() {
Expand Down Expand Up @@ -765,6 +773,8 @@ fn link_natively(
out_filename: &Path,
codegen_results: &CodegenResults,
tmpdir: &Path,
tempfiles_for_linked_objects: &mut Vec<PathBuf>,
outputs: &OutputFilenames,
) {
info!("preparing {:?} to {:?}", crate_type, out_filename);
let (linker_path, flavor) = linker_and_flavor(sess);
Expand All @@ -789,6 +799,8 @@ fn link_natively(
temp_filename,
codegen_results,
self_contained_components,
tempfiles_for_linked_objects,
outputs,
);

linker::disable_localization(&mut cmd);
Expand Down Expand Up @@ -2248,6 +2260,8 @@ fn linker_with_args(
out_filename: &Path,
codegen_results: &CodegenResults,
self_contained_components: LinkSelfContainedComponents,
tempfiles_for_linked_objects: &mut Vec<PathBuf>,
outputs: &OutputFilenames,
) -> Command {
let self_contained_crt_objects = self_contained_components.is_crt_objects_enabled();
let cmd = &mut *super::linker::get_linker(
Expand Down Expand Up @@ -2323,6 +2337,13 @@ fn linker_with_args(
add_local_crate_regular_objects(cmd, codegen_results);
add_local_crate_metadata_objects(cmd, crate_type, codegen_results);
add_local_crate_allocator_objects(cmd, codegen_results);
add_local_crate_linked_objects(
cmd,
codegen_results,
crate_type,
tempfiles_for_linked_objects,
outputs,
);

// Avoid linking to dynamic libraries unless they satisfy some undefined symbols
// at the point at which they are specified on the command line.
Expand Down Expand Up @@ -2919,6 +2940,35 @@ fn rehome_lib_path(sess: &Session, path: &Path) -> PathBuf {
}
}

fn add_local_crate_linked_objects(
cmd: &mut dyn Linker,
codegen_results: &CodegenResults,
crate_type: CrateType,
tempfiles_for_linked_objects: &mut Vec<PathBuf>,
outputs: &OutputFilenames,
) {
for (cnum, objects) in &codegen_results.crate_info.linked_objects[&crate_type] {
let src = &codegen_results.crate_info.used_crate_source[cnum];
let cratepath = &src.rlib.as_ref().unwrap().0;
let archive_map = unsafe { Mmap::map(File::open(cratepath).unwrap()).unwrap() };
let archive = ArchiveFile::parse(&*archive_map)
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))
.unwrap();
for member in archive.members() {
let member = member.unwrap();
let name = std::str::from_utf8(member.name()).unwrap();
if objects.contains(name) {
let data = member.data(&*archive_map).unwrap();
let obj =
outputs.temp_path(OutputType::Object, Some(&format!("{name}.linked_object")));
fs::write(&obj, data).unwrap();
cmd.add_object(&obj);
tempfiles_for_linked_objects.push(obj);
}
}
}
}

// Adds the static "rlib" versions of all crates to the command line.
// There's a bit of magic which happens here specifically related to LTO,
// namely that we remove upstream object files.
Expand Down
105 changes: 81 additions & 24 deletions compiler/rustc_codegen_ssa/src/back/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ use std::path::{Path, PathBuf};
use std::{env, io, iter, mem, str};

use cc::windows_registry;
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
use rustc_metadata::{
find_native_static_library, try_find_native_dynamic_library, try_find_native_static_library,
};
use rustc_middle::bug;
use rustc_middle::middle::dependency_format::Linkage;
use rustc_middle::middle::exported_symbols;
use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportKind};
use rustc_middle::middle::exported_symbols::{
self, ExportedSymbol, SymbolExportInfo, SymbolExportKind,
};
use rustc_middle::ty::TyCtxt;
use rustc_session::Session;
use rustc_session::config::{self, CrateType, DebugInfo, LinkerPluginLto, Lto, OptLevel, Strip};
Expand All @@ -21,6 +23,7 @@ use rustc_target::spec::{Cc, LinkOutputKind, LinkerFlavor, Lld};
use tracing::{debug, warn};

use super::command::Command;
use super::link::are_upstream_rust_objects_already_included;
use super::symbol_export;
use crate::errors;

Expand Down Expand Up @@ -1753,17 +1756,15 @@ impl<'a> Linker for AixLinker<'a> {
fn for_each_exported_symbols_include_dep<'tcx>(
tcx: TyCtxt<'tcx>,
crate_type: CrateType,
mut callback: impl FnMut(ExportedSymbol<'tcx>, SymbolExportInfo, CrateNum),
mut callback: impl FnMut(&'tcx [(ExportedSymbol<'tcx>, SymbolExportInfo)], CrateNum),
) {
let formats = tcx.dependency_formats(());
let deps = &formats[&crate_type];

for (cnum, dep_format) in deps.iter_enumerated() {
// For each dependency that we are linking to statically ...
if *dep_format == Linkage::Static {
for &(symbol, info) in tcx.exported_symbols(cnum).iter() {
callback(symbol, info, cnum);
}
callback(tcx.exported_symbols(cnum), cnum);
}
}
}
Expand All @@ -1783,12 +1784,14 @@ pub(crate) fn exported_symbols(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec<St
fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec<String> {
let mut symbols = Vec::new();
let export_threshold = symbol_export::crates_export_threshold(&[crate_type]);
for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| {
if info.level.is_below_threshold(export_threshold) {
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
tcx, symbol, cnum,
));
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
for_each_exported_symbols_include_dep(tcx, crate_type, |exported_symbols, cnum| {
for &(symbol, info) in exported_symbols {
if info.level.is_below_threshold(export_threshold) {
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
tcx, symbol, cnum,
));
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
}
}
});

Expand All @@ -1808,30 +1811,84 @@ fn exported_symbols_for_proc_macro_crate(tcx: TyCtxt<'_>) -> Vec<String> {
vec![proc_macro_decls_name, metadata_symbol_name]
}

pub(crate) fn linked_symbols(
pub(crate) fn linked_objects(
tcx: TyCtxt<'_>,
crate_type: CrateType,
) -> Vec<(String, SymbolExportKind)> {
linked_symbols: &mut Vec<(String, SymbolExportKind)>,
) -> FxIndexMap<CrateNum, FxHashSet<String>> {
match crate_type {
CrateType::Executable | CrateType::Cdylib | CrateType::Dylib => (),
CrateType::Staticlib | CrateType::ProcMacro | CrateType::Rlib => {
return Vec::new();
return FxIndexMap::default();
}
}

let mut symbols = Vec::new();

let mut objects = FxIndexMap::default();
let upstream_rust_objects_already_included =
are_upstream_rust_objects_already_included(tcx.sess);
let export_threshold = symbol_export::crates_export_threshold(&[crate_type]);
for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| {
if info.level.is_below_threshold(export_threshold) || info.used {
symbols.push((
symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum),
info.kind,
));
for_each_exported_symbols_include_dep(tcx, crate_type, |exported_symbols, cnum| {
let exported_symbols = exported_symbols.iter().filter(|(_, info)| {
(!matches!(crate_type, CrateType::Executable)
&& info.level.is_below_threshold(export_threshold))
|| info.used
});
if cnum == LOCAL_CRATE {
// Since the local crate is always linked directly to object files, `#[used]` works as expected,
// we only need add undefined symbols.
linked_symbols.extend(
exported_symbols
.filter(|(symbol, _)| match symbol {
ExportedSymbol::NonGeneric { cgu, .. } => cgu.is_none(),
ExportedSymbol::Generic(..)
| ExportedSymbol::DropGlue(..)
| ExportedSymbol::AsyncDropGlueCtorShim(..) => false,
ExportedSymbol::ThreadLocalShim(_def_id) => false,
ExportedSymbol::NoDefId(..) => true,
})
.map(|&(symbol, info)| {
(
symbol_export::linking_symbol_name_for_instance_in_crate(
tcx, symbol, cnum,
),
info.kind,
)
}),
);
return;
}
if matches!(crate_type, CrateType::Executable) && tcx.is_compiler_builtins(cnum) {
return;
}
let lto = upstream_rust_objects_already_included;
let mut cgus = FxHashSet::default();
for &(symbol, info) in exported_symbols {
match symbol {
ExportedSymbol::NonGeneric { cgu: Some(cgu), .. } => {
if !lto {
cgus.insert(cgu.as_str().to_string());
}
}
ExportedSymbol::NonGeneric { cgu: None, .. } | ExportedSymbol::NoDefId(..) => {
// Unresolved symbols may come from external libraries.
linked_symbols.push((
symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum),
info.kind,
));
}
ExportedSymbol::Generic(..)
| ExportedSymbol::DropGlue(..)
| ExportedSymbol::AsyncDropGlueCtorShim(..)
| ExportedSymbol::ThreadLocalShim(..) => {}
};
}
if cgus.is_empty() {
return;
}
objects.insert(cnum, cgus);
});

symbols
objects
}

/// Much simplified and explicit CLI for the NVPTX linker. The linker operates
Expand Down
60 changes: 52 additions & 8 deletions compiler/rustc_codegen_ssa/src/back/symbol_export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::middle::exported_symbols::{
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
};
use rustc_middle::mir::mono::{CodegenUnit, MonoItem};
use rustc_middle::query::LocalCrate;
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt};
use rustc_middle::util::Providers;
use rustc_session::config::{CrateType, OomStrategy};
use rustc_session::config::{CrateType, OomStrategy, OutputFilenames, OutputType};
use rustc_span::Symbol;
use rustc_target::callconv::Conv;
use rustc_target::spec::{SanitizerSet, TlsModel};
use tracing::debug;
Expand Down Expand Up @@ -168,6 +170,36 @@ fn is_reachable_non_generic_provider_extern(tcx: TyCtxt<'_>, def_id: DefId) -> b
tcx.reachable_non_generics(def_id.krate).contains_key(&def_id)
}

fn find_codegen_unit<'tcx>(
tcx: TyCtxt<'tcx>,
codegen_units: &'tcx [CodegenUnit<'tcx>],
outputs: &'tcx OutputFilenames,
def_id: DefId,
) -> Option<Symbol> {
if !tcx.is_codegened_item(def_id) {
return None;
}
let item = if tcx.is_static(def_id) {
MonoItem::Static(def_id)
} else {
MonoItem::Fn(Instance::mono(tcx, def_id))
};
codegen_units.iter().find_map(|cgu| {
if cgu.contains_item(&item) {
Some(Symbol::intern(
outputs
.temp_path(OutputType::Object, Some(cgu.name().as_str()))
.file_name()
.unwrap()
.to_str()
.unwrap(),
))
} else {
None
}
})
}

fn exported_symbols_provider_local(
tcx: TyCtxt<'_>,
_: LocalCrate,
Expand All @@ -182,8 +214,20 @@ fn exported_symbols_provider_local(
tcx.reachable_non_generics(LOCAL_CRATE).to_sorted(&hcx, true)
});

let mut symbols: Vec<_> =
sorted.iter().map(|&(&def_id, &info)| (ExportedSymbol::NonGeneric(def_id), info)).collect();
let outputs = tcx.output_filenames(());
let codegen_units = tcx.collect_and_partition_mono_items(()).codegen_units;
let mut symbols: Vec<_> = sorted
.iter()
.map(|&(&def_id, &info)| {
(
ExportedSymbol::NonGeneric {
def_id,
cgu: find_codegen_unit(tcx, codegen_units, outputs, def_id),
},
info,
)
})
.collect();

// Export TLS shims
if !tcx.sess.target.dll_tls_export {
Expand Down Expand Up @@ -433,7 +477,7 @@ fn upstream_monomorphizations_provider(
continue;
}
}
ExportedSymbol::NonGeneric(..)
ExportedSymbol::NonGeneric { .. }
| ExportedSymbol::ThreadLocalShim(..)
| ExportedSymbol::NoDefId(..) => {
// These are no monomorphizations
Expand Down Expand Up @@ -545,7 +589,7 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>(
// This is something instantiated in an upstream crate, so we have to use
// the slower (because uncached) version of computing the symbol name.
match symbol {
ExportedSymbol::NonGeneric(def_id) => {
ExportedSymbol::NonGeneric { def_id, .. } => {
rustc_symbol_mangling::symbol_name_for_instance_in_crate(
tcx,
Instance::mono(tcx, def_id),
Expand Down Expand Up @@ -590,12 +634,12 @@ fn calling_convention_for_symbol<'tcx>(
symbol: ExportedSymbol<'tcx>,
) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) {
let instance = match symbol {
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
ExportedSymbol::NonGeneric { def_id, .. } | ExportedSymbol::Generic(def_id, _)
if tcx.is_static(def_id) =>
{
None
}
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
ExportedSymbol::NonGeneric { def_id, .. } => Some(Instance::mono(tcx, def_id)),
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
// DropGlue always use the Rust calling convention and thus follow the target's default
// symbol decoration scheme.
Expand Down Expand Up @@ -711,7 +755,7 @@ fn maybe_emutls_symbol_name<'tcx>(
undecorated: &str,
) -> Option<String> {
if matches!(tcx.sess.tls_model(), TlsModel::Emulated)
&& let ExportedSymbol::NonGeneric(def_id) = symbol
&& let ExportedSymbol::NonGeneric { def_id, .. } = symbol
&& tcx.is_thread_local_static(def_id)
{
// When using emutls, LLVM will add the `__emutls_v.` prefix to thread local symbols,
Expand Down
Loading

0 comments on commit 46536f1

Please sign in to comment.