From 9482bda375e111e91d56f79ceec56ad53b803ada Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Wed, 4 Dec 2024 01:55:46 +0100 Subject: [PATCH 1/4] Add a test case for `#[used]` with archive --- .../include-all-symbols-linking/lib.rs | 3 +- .../include-all-symbols-linking/rmake.rs | 15 +++++--- .../auxiliary/used_pre_main_constructor.rs | 36 +++++++++++++++++++ tests/ui/attributes/used_with_archive.rs | 20 +++++++++++ .../attributes/used_with_archive.run.stdout | 2 ++ 5 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 tests/ui/attributes/auxiliary/used_pre_main_constructor.rs create mode 100644 tests/ui/attributes/used_with_archive.rs create mode 100644 tests/ui/attributes/used_with_archive.run.stdout diff --git a/tests/run-make/include-all-symbols-linking/lib.rs b/tests/run-make/include-all-symbols-linking/lib.rs index 99508bcdaf314..73186ee99e3d9 100644 --- a/tests/run-make/include-all-symbols-linking/lib.rs +++ b/tests/run-make/include-all-symbols-linking/lib.rs @@ -1,5 +1,6 @@ mod foo { - #[link_section = ".rodata.STATIC"] + #[cfg_attr(target_os = "linux", link_section = ".rodata.STATIC")] + #[cfg_attr(target_vendor = "apple", link_section = "__DATA,STATIC")] #[used] static STATIC: [u32; 10] = [1; 10]; } diff --git a/tests/run-make/include-all-symbols-linking/rmake.rs b/tests/run-make/include-all-symbols-linking/rmake.rs index 77fd71ab20d21..bab510fb5be3c 100644 --- a/tests/run-make/include-all-symbols-linking/rmake.rs +++ b/tests/run-make/include-all-symbols-linking/rmake.rs @@ -7,15 +7,20 @@ // See https://github.com/rust-lang/rust/pull/95604 // See https://github.com/rust-lang/rust/issues/47384 -//@ only-linux -// Reason: differences in object file formats on OSX and Windows -// causes errors in the llvm_objdump step +//@ ignore-wasm differences in object file formats causes errors in the llvm_objdump step. +//@ ignore-windows differences in object file formats causes errors in the llvm_objdump step. -use run_make_support::{dynamic_lib_name, llvm_objdump, llvm_readobj, rustc}; +use run_make_support::{dynamic_lib_name, llvm_objdump, llvm_readobj, rustc, target}; fn main() { rustc().crate_type("lib").input("lib.rs").run(); - rustc().crate_type("cdylib").link_args("-Tlinker.ld").input("main.rs").run(); + let mut main = rustc(); + main.crate_type("cdylib"); + if target().contains("linux") { + main.link_args("-Tlinker.ld"); + } + main.input("main.rs").run(); + // Ensure `#[used]` and `KEEP`-ed section is there llvm_objdump() .arg("--full-contents") diff --git a/tests/ui/attributes/auxiliary/used_pre_main_constructor.rs b/tests/ui/attributes/auxiliary/used_pre_main_constructor.rs new file mode 100644 index 0000000000000..6f9d7cc59e950 --- /dev/null +++ b/tests/ui/attributes/auxiliary/used_pre_main_constructor.rs @@ -0,0 +1,36 @@ +//! Add a constructor that runs pre-main, similar to what the `ctor` crate does. +//! +//! #[ctor] +//! fn constructor() { +//! printf(c"constructor\n"); +//! } + +//@ edition:2021 +//@ no-prefer-dynamic explicitly test with crates that are built as an archive +#![crate_type = "rlib"] + +#[cfg_attr( + any( + target_os = "linux", + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + target_os = "openbsd", + target_os = "dragonfly", + target_os = "illumos", + target_os = "haiku" + ), + link_section = ".init_array" +)] +#[cfg_attr(target_vendor = "apple", link_section = "__DATA,__mod_init_func,mod_init_funcs")] +#[cfg_attr(target_os = "windows", link_section = ".CRT$XCU")] +#[used] +static CONSTRUCTOR: extern "C" fn() = constructor; + +#[cfg_attr(any(target_os = "linux", target_os = "android"), link_section = ".text.startup")] +extern "C" fn constructor() { + extern "C" { + fn printf(format: *const std::ffi::c_char, ...) -> std::ffi::c_int; + } + unsafe { printf(c"constructor\n".as_ptr()) }; +} diff --git a/tests/ui/attributes/used_with_archive.rs b/tests/ui/attributes/used_with_archive.rs new file mode 100644 index 0000000000000..0a670a855d319 --- /dev/null +++ b/tests/ui/attributes/used_with_archive.rs @@ -0,0 +1,20 @@ +//! Ensure that `#[used]` in archives are correctly registered. +//! +//! Regression test for https://github.com/rust-lang/rust/issues/133491. + +//@ edition:2021 +//@ run-pass +//@ check-run-results +//@ aux-build: used_pre_main_constructor.rs + +//@ ignore-wasm ctor doesn't work on WASM + +// Make sure `rustc` links the archive, but intentionally do not import/use any items. +extern crate used_pre_main_constructor as _; + +fn main() { + extern "C" { + fn printf(format: *const std::ffi::c_char, ...) -> std::ffi::c_int; + } + unsafe { printf(c"main\n".as_ptr()) }; +} diff --git a/tests/ui/attributes/used_with_archive.run.stdout b/tests/ui/attributes/used_with_archive.run.stdout new file mode 100644 index 0000000000000..212372b3e5795 --- /dev/null +++ b/tests/ui/attributes/used_with_archive.run.stdout @@ -0,0 +1,2 @@ +constructor +main From 5f77dba062d76f900cb8e96806e6cd8097b798ba Mon Sep 17 00:00:00 2001 From: DianQK Date: Fri, 21 Feb 2025 22:24:07 +0800 Subject: [PATCH 2/4] Link object files that use `#[used]` --- compiler/rustc_codegen_ssa/src/back/link.rs | 47 +++++++ compiler/rustc_codegen_ssa/src/back/linker.rs | 120 ++++++++++++++---- compiler/rustc_codegen_ssa/src/base.rs | 20 ++- compiler/rustc_codegen_ssa/src/lib.rs | 3 +- tests/run-make/linker-warning/short-error.txt | 2 +- 5 files changed, 163 insertions(+), 29 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index a8d917f0fdb58..f4293f45e9059 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -9,6 +9,7 @@ use std::{env, fmt, fs, io, mem, str}; use cc::windows_registry; use itertools::Itertools; +use object::read::archive::{ArchiveFile, ArchiveOffset}; use regex::Regex; use rustc_arena::TypedArena; use rustc_ast::CRATE_NODE_ID; @@ -78,6 +79,7 @@ pub fn link_binary( let _timer = sess.timer("link_binary"); let output_metadata = sess.opts.output_types.contains_key(&OutputType::Metadata); let mut tempfiles_for_stdout_output: Vec = Vec::new(); + let mut tempfiles_for_linked_objects: Vec = Vec::new(); for &crate_type in &codegen_results.crate_info.crate_types { // Ignore executable crates if we have -Z no-codegen, as they will error. if (sess.opts.unstable_opts.no_codegen || !sess.opts.output_types.should_codegen()) @@ -142,6 +144,8 @@ pub fn link_binary( &out_filename, &codegen_results, path.as_ref(), + &mut tempfiles_for_linked_objects, + outputs, ); } } @@ -214,6 +218,10 @@ pub fn link_binary( ensure_removed(sess.dcx(), &temp); } + for temp in tempfiles_for_linked_objects { + ensure_removed(sess.dcx(), &temp); + } + // If no requested outputs require linking, then the object temporaries should // be kept. if !sess.opts.output_types.should_link() { @@ -771,6 +779,8 @@ fn link_natively( out_filename: &Path, codegen_results: &CodegenResults, tmpdir: &Path, + tempfiles_for_linked_objects: &mut Vec, + outputs: &OutputFilenames, ) { info!("preparing {:?} to {:?}", crate_type, out_filename); let (linker_path, flavor) = linker_and_flavor(sess); @@ -795,6 +805,8 @@ fn link_natively( temp_filename, codegen_results, self_contained_components, + tempfiles_for_linked_objects, + outputs, ); linker::disable_localization(&mut cmd); @@ -2254,6 +2266,8 @@ fn linker_with_args( out_filename: &Path, codegen_results: &CodegenResults, self_contained_components: LinkSelfContainedComponents, + tempfiles_for_linked_objects: &mut Vec, + outputs: &OutputFilenames, ) -> Command { let self_contained_crt_objects = self_contained_components.is_crt_objects_enabled(); let cmd = &mut *super::linker::get_linker( @@ -2329,6 +2343,13 @@ fn linker_with_args( add_local_crate_regular_objects(cmd, codegen_results); add_local_crate_metadata_objects(cmd, crate_type, codegen_results); add_local_crate_allocator_objects(cmd, codegen_results); + add_local_crate_linked_objects( + cmd, + codegen_results, + crate_type, + tempfiles_for_linked_objects, + outputs, + ); // Avoid linking to dynamic libraries unless they satisfy some undefined symbols // at the point at which they are specified on the command line. @@ -2925,6 +2946,32 @@ fn rehome_lib_path(sess: &Session, path: &Path) -> PathBuf { } } +fn add_local_crate_linked_objects( + cmd: &mut dyn Linker, + codegen_results: &CodegenResults, + crate_type: CrateType, + tempfiles_for_linked_objects: &mut Vec, + outputs: &OutputFilenames, +) { + for (cnum, offsets) in &codegen_results.crate_info.linked_objects[&crate_type] { + let src = &codegen_results.crate_info.used_crate_source[cnum]; + let cratepath = &src.rlib.as_ref().unwrap().0; + let archive_map = unsafe { Mmap::map(File::open(cratepath).unwrap()).unwrap() }; + let archive = ArchiveFile::parse(&*archive_map) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) + .unwrap(); + for &offset in offsets { + let member = archive.member(ArchiveOffset(offset)).unwrap(); + let name = std::str::from_utf8(member.name()).unwrap(); + let data = member.data(&*archive_map).unwrap(); + let obj = outputs.temp_path(OutputType::Object, Some(&format!("{name}.linked_object"))); + fs::write(&obj, data).unwrap(); + cmd.add_object(&obj); + tempfiles_for_linked_objects.push(obj); + } + } +} + // Adds the static "rlib" versions of all crates to the command line. // There's a bit of magic which happens here specifically related to LTO, // namely that we remove upstream object files. diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index 8900405c1b8f8..4e0dff0a38971 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -5,14 +5,18 @@ use std::path::{Path, PathBuf}; use std::{env, io, iter, mem, str}; use cc::windows_registry; +use object::read::archive::ArchiveFile; +use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet}; +use rustc_data_structures::memmap::Mmap; use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; use rustc_metadata::{ find_native_static_library, try_find_native_dynamic_library, try_find_native_static_library, }; use rustc_middle::bug; use rustc_middle::middle::dependency_format::Linkage; -use rustc_middle::middle::exported_symbols; -use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportKind}; +use rustc_middle::middle::exported_symbols::{ + self, ExportedSymbol, SymbolExportInfo, SymbolExportKind, +}; use rustc_middle::ty::TyCtxt; use rustc_session::Session; use rustc_session::config::{self, CrateType, DebugInfo, LinkerPluginLto, Lto, OptLevel, Strip}; @@ -21,6 +25,7 @@ use rustc_target::spec::{Cc, LinkOutputKind, LinkerFlavor, Lld}; use tracing::{debug, warn}; use super::command::Command; +use super::link::are_upstream_rust_objects_already_included; use super::symbol_export; use crate::errors; @@ -1753,7 +1758,7 @@ impl<'a> Linker for AixLinker<'a> { fn for_each_exported_symbols_include_dep<'tcx>( tcx: TyCtxt<'tcx>, crate_type: CrateType, - mut callback: impl FnMut(ExportedSymbol<'tcx>, SymbolExportInfo, CrateNum), + mut callback: impl FnMut(&'tcx [(ExportedSymbol<'tcx>, SymbolExportInfo)], CrateNum), ) { let formats = tcx.dependency_formats(()); let deps = &formats[&crate_type]; @@ -1761,9 +1766,7 @@ fn for_each_exported_symbols_include_dep<'tcx>( for (cnum, dep_format) in deps.iter_enumerated() { // For each dependency that we are linking to statically ... if *dep_format == Linkage::Static { - for &(symbol, info) in tcx.exported_symbols(cnum).iter() { - callback(symbol, info, cnum); - } + callback(tcx.exported_symbols(cnum), cnum); } } } @@ -1783,12 +1786,14 @@ pub(crate) fn exported_symbols(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec, crate_type: CrateType) -> Vec { let mut symbols = Vec::new(); let export_threshold = symbol_export::crates_export_threshold(&[crate_type]); - for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| { - if info.level.is_below_threshold(export_threshold) { - symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate( - tcx, symbol, cnum, - )); - symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum); + for_each_exported_symbols_include_dep(tcx, crate_type, |exported_symbols, cnum| { + for &(symbol, info) in exported_symbols { + if info.level.is_below_threshold(export_threshold) { + symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate( + tcx, symbol, cnum, + )); + symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum); + } } }); @@ -1808,30 +1813,97 @@ fn exported_symbols_for_proc_macro_crate(tcx: TyCtxt<'_>) -> Vec { vec![proc_macro_decls_name, metadata_symbol_name] } -pub(crate) fn linked_symbols( +fn add_linked_objects( + archive_path: &Path, + linked_symbols: &mut FxHashSet, +) -> Option> { + let archive_map = unsafe { Mmap::map(File::open(&archive_path).unwrap()).unwrap() }; + let archive = ArchiveFile::parse(&*archive_map) + .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) + .unwrap(); + let Some(archive_symbols) = archive.symbols().unwrap() else { + return None; + }; + let mut offsets = FxIndexSet::default(); + for symbol in archive_symbols { + let symbol = symbol.unwrap(); + let name = std::str::from_utf8(symbol.name()).unwrap(); + if linked_symbols.remove(name) { + offsets.insert(symbol.offset().0); + } + } + Some(offsets) +} + +pub(crate) fn linked_objects( tcx: TyCtxt<'_>, crate_type: CrateType, -) -> Vec<(String, SymbolExportKind)> { + linked_symbols: &mut Vec<(String, SymbolExportKind)>, +) -> FxIndexMap> { match crate_type { CrateType::Executable | CrateType::Cdylib | CrateType::Dylib => (), CrateType::Staticlib | CrateType::ProcMacro | CrateType::Rlib => { - return Vec::new(); + return FxIndexMap::default(); } } - let mut symbols = Vec::new(); - + let mut objects = FxIndexMap::default(); + let upstream_rust_objects_already_included = + are_upstream_rust_objects_already_included(tcx.sess); let export_threshold = symbol_export::crates_export_threshold(&[crate_type]); - for_each_exported_symbols_include_dep(tcx, crate_type, |symbol, info, cnum| { - if info.level.is_below_threshold(export_threshold) || info.used { - symbols.push(( - symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum), - info.kind, - )); + for_each_exported_symbols_include_dep(tcx, crate_type, |exported_symbols, cnum| { + if cnum == LOCAL_CRATE { + // We don't know here if the symbols are undefined, so we add them all. + // Since the local crate is always linked directly to object files, `#[used]` works as expected. + linked_symbols.extend( + exported_symbols + .iter() + .filter(|(_, info)| { + info.level.is_below_threshold(export_threshold) || info.used + }) + .map(|&(symbol, info)| { + ( + symbol_export::linking_symbol_name_for_instance_in_crate( + tcx, symbol, cnum, + ), + info.kind, + ) + }), + ); + return; + } + // TODO: let lto = upstream_rust_objects_already_included && !ignored_for_lto(tcx.sess, &codegen_results.crate_info, cnum); + let lto = upstream_rust_objects_already_included; + if lto { + return; + } + let symbols: Vec<_> = exported_symbols + .iter() + .filter(|(_, info)| info.level.is_below_threshold(export_threshold) || info.used) + .map(|&(symbol, info)| { + ( + symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum), + info.kind, + ) + }) + .collect(); + if symbols.is_empty() { + return; } + let used_crate_source = tcx.used_crate_source(cnum); + let cratepath = &used_crate_source.rlib.as_ref().unwrap().0; + let mut crate_linked_symbols: FxHashSet<_> = + symbols.iter().map(|(symbol, _)| symbol.to_string()).collect(); + if let Some(archive_offsets) = add_linked_objects(cratepath, &mut crate_linked_symbols) { + objects.insert(cnum, archive_offsets); + } + // Unresolved symbols may come from external libraries. + linked_symbols.extend( + symbols.into_iter().filter(|(symbol, _)| crate_linked_symbols.contains(symbol)), + ); }); - symbols + objects } /// Much simplified and explicit CLI for the NVPTX linker. The linker operates diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 40238f4b4915a..2c35cbf828a7d 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -6,7 +6,7 @@ use std::time::{Duration, Instant}; use itertools::Itertools; use rustc_abi::FIRST_VARIANT; use rustc_ast::expand::allocator::{ALLOCATOR_METHODS, AllocatorKind, global_fn_name}; -use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap, FxIndexSet}; use rustc_data_structures::profiling::{get_resident_set_size, print_time_passes_entry}; use rustc_data_structures::sync::par_map; use rustc_data_structures::unord::UnordMap; @@ -872,8 +872,21 @@ impl CrateInfo { .iter() .map(|&c| (c, crate::back::linker::exported_symbols(tcx, c))) .collect(); - let linked_symbols = - crate_types.iter().map(|&c| (c, crate::back::linker::linked_symbols(tcx, c))).collect(); + let mut linked_symbols: FxIndexMap> = + crate_types.iter().map(|&c| (c, Vec::new())).collect(); + let linked_objects = crate_types + .iter() + .map(|&c| { + ( + c, + crate::back::linker::linked_objects( + tcx, + c, + linked_symbols.get_mut(&c).unwrap(), + ), + ) + }) + .collect(); let local_crate_name = tcx.crate_name(LOCAL_CRATE); let crate_attrs = tcx.hir().attrs(rustc_hir::CRATE_HIR_ID); let subsystem = @@ -919,6 +932,7 @@ impl CrateInfo { crate_types, exported_symbols, linked_symbols, + linked_objects, local_crate_name, compiler_builtins, profiler_runtime: None, diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 9d2ac219d592c..356d4628af8c1 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -27,7 +27,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use rustc_ast as ast; -use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; +use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet}; use rustc_data_structures::unord::UnordMap; use rustc_hir::CRATE_HIR_ID; use rustc_hir::def_id::CrateNum; @@ -194,6 +194,7 @@ pub struct CrateInfo { pub crate_types: Vec, pub exported_symbols: UnordMap>, pub linked_symbols: FxIndexMap>, + pub linked_objects: FxIndexMap>>, pub local_crate_name: Symbol, pub compiler_builtins: Option, pub profiler_runtime: Option, diff --git a/tests/run-make/linker-warning/short-error.txt b/tests/run-make/linker-warning/short-error.txt index dd3b742bbfd56..238e25574fa48 100644 --- a/tests/run-make/linker-warning/short-error.txt +++ b/tests/run-make/linker-warning/short-error.txt @@ -1,6 +1,6 @@ error: linking with `./fake-linker` failed: exit status: 1 | - = note: "./fake-linker" "-m64" "/tmp/rustc/symbols.o" "<2 object files omitted>" "-Wl,--as-needed" "-Wl,-Bstatic" "/lib/rustlib/x86_64-unknown-linux-gnu/lib/{libstd-*,libpanic_unwind-*,libobject-*,libmemchr-*,libaddr2line-*,libgimli-*,librustc_demangle-*,libstd_detect-*,libhashbrown-*,librustc_std_workspace_alloc-*,libminiz_oxide-*,libadler2-*,libunwind-*,libcfg_if-*,liblibc-*,liballoc-*,librustc_std_workspace_core-*,libcore-*,libcompiler_builtins-*}.rlib" "-Wl,-Bdynamic" "-lgcc_s" "-lutil" "-lrt" "-lpthread" "-lm" "-ldl" "-lc" "-Wl,--eh-frame-hdr" "-Wl,-z,noexecstack" "-L" "/build-root/test/run-make/linker-warning/rmake_out" "-L" "/lib/rustlib/x86_64-unknown-linux-gnu/lib" "-o" "main" "-Wl,--gc-sections" "-pie" "-Wl,-z,relro,-z,now" "-nodefaultlibs" "run_make_error" + = note: "./fake-linker" "-m64" "/tmp/rustc/symbols.o" "<5 object files omitted>" "-Wl,--as-needed" "-Wl,-Bstatic" "/lib/rustlib/x86_64-unknown-linux-gnu/lib/{libstd-*,libpanic_unwind-*,libobject-*,libmemchr-*,libaddr2line-*,libgimli-*,librustc_demangle-*,libstd_detect-*,libhashbrown-*,librustc_std_workspace_alloc-*,libminiz_oxide-*,libadler2-*,libunwind-*,libcfg_if-*,liblibc-*,liballoc-*,librustc_std_workspace_core-*,libcore-*,libcompiler_builtins-*}.rlib" "-Wl,-Bdynamic" "-lgcc_s" "-lutil" "-lrt" "-lpthread" "-lm" "-ldl" "-lc" "-Wl,--eh-frame-hdr" "-Wl,-z,noexecstack" "-L" "/build-root/test/run-make/linker-warning/rmake_out" "-L" "/lib/rustlib/x86_64-unknown-linux-gnu/lib" "-o" "main" "-Wl,--gc-sections" "-pie" "-Wl,-z,relro,-z,now" "-nodefaultlibs" "run_make_error" = note: some arguments are omitted. use `--verbose` to show all linker arguments = note: error: baz From 49a4c6766f950db0a518bfe964843d772ec4056f Mon Sep 17 00:00:00 2001 From: DianQK Date: Sun, 23 Feb 2025 16:39:21 +0800 Subject: [PATCH 3/4] Ensuring proper codegen unit association for exported symbols. --- compiler/rustc_codegen_ssa/src/back/link.rs | 21 +++-- compiler/rustc_codegen_ssa/src/back/linker.rs | 93 ++++++++----------- .../src/back/symbol_export.rs | 60 ++++++++++-- compiler/rustc_codegen_ssa/src/lib.rs | 4 +- .../src/rmeta/decoder/cstore_impl.rs | 2 +- .../src/middle/exported_symbols.rs | 7 +- src/tools/miri/src/bin/miri.rs | 6 +- src/tools/miri/src/helpers.rs | 2 +- 8 files changed, 113 insertions(+), 82 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index f4293f45e9059..80499f9ac6360 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -9,7 +9,7 @@ use std::{env, fmt, fs, io, mem, str}; use cc::windows_registry; use itertools::Itertools; -use object::read::archive::{ArchiveFile, ArchiveOffset}; +use object::read::archive::ArchiveFile; use regex::Regex; use rustc_arena::TypedArena; use rustc_ast::CRATE_NODE_ID; @@ -2953,21 +2953,24 @@ fn add_local_crate_linked_objects( tempfiles_for_linked_objects: &mut Vec, outputs: &OutputFilenames, ) { - for (cnum, offsets) in &codegen_results.crate_info.linked_objects[&crate_type] { + for (cnum, objects) in &codegen_results.crate_info.linked_objects[&crate_type] { let src = &codegen_results.crate_info.used_crate_source[cnum]; let cratepath = &src.rlib.as_ref().unwrap().0; let archive_map = unsafe { Mmap::map(File::open(cratepath).unwrap()).unwrap() }; let archive = ArchiveFile::parse(&*archive_map) .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) .unwrap(); - for &offset in offsets { - let member = archive.member(ArchiveOffset(offset)).unwrap(); + for member in archive.members() { + let member = member.unwrap(); let name = std::str::from_utf8(member.name()).unwrap(); - let data = member.data(&*archive_map).unwrap(); - let obj = outputs.temp_path(OutputType::Object, Some(&format!("{name}.linked_object"))); - fs::write(&obj, data).unwrap(); - cmd.add_object(&obj); - tempfiles_for_linked_objects.push(obj); + if objects.contains(name) { + let data = member.data(&*archive_map).unwrap(); + let obj = + outputs.temp_path(OutputType::Object, Some(&format!("{name}.linked_object"))); + fs::write(&obj, data).unwrap(); + cmd.add_object(&obj); + tempfiles_for_linked_objects.push(obj); + } } } } diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index 4e0dff0a38971..c9571f8f58eba 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -5,9 +5,7 @@ use std::path::{Path, PathBuf}; use std::{env, io, iter, mem, str}; use cc::windows_registry; -use object::read::archive::ArchiveFile; -use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet}; -use rustc_data_structures::memmap::Mmap; +use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; use rustc_metadata::{ find_native_static_library, try_find_native_dynamic_library, try_find_native_static_library, @@ -1813,33 +1811,11 @@ fn exported_symbols_for_proc_macro_crate(tcx: TyCtxt<'_>) -> Vec { vec![proc_macro_decls_name, metadata_symbol_name] } -fn add_linked_objects( - archive_path: &Path, - linked_symbols: &mut FxHashSet, -) -> Option> { - let archive_map = unsafe { Mmap::map(File::open(&archive_path).unwrap()).unwrap() }; - let archive = ArchiveFile::parse(&*archive_map) - .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err)) - .unwrap(); - let Some(archive_symbols) = archive.symbols().unwrap() else { - return None; - }; - let mut offsets = FxIndexSet::default(); - for symbol in archive_symbols { - let symbol = symbol.unwrap(); - let name = std::str::from_utf8(symbol.name()).unwrap(); - if linked_symbols.remove(name) { - offsets.insert(symbol.offset().0); - } - } - Some(offsets) -} - pub(crate) fn linked_objects( tcx: TyCtxt<'_>, crate_type: CrateType, linked_symbols: &mut Vec<(String, SymbolExportKind)>, -) -> FxIndexMap> { +) -> FxIndexMap> { match crate_type { CrateType::Executable | CrateType::Cdylib | CrateType::Dylib => (), CrateType::Staticlib | CrateType::ProcMacro | CrateType::Rlib => { @@ -1852,14 +1828,21 @@ pub(crate) fn linked_objects( are_upstream_rust_objects_already_included(tcx.sess); let export_threshold = symbol_export::crates_export_threshold(&[crate_type]); for_each_exported_symbols_include_dep(tcx, crate_type, |exported_symbols, cnum| { + let exported_symbols = exported_symbols + .iter() + .filter(|(_, info)| info.level.is_below_threshold(export_threshold) || info.used); if cnum == LOCAL_CRATE { - // We don't know here if the symbols are undefined, so we add them all. - // Since the local crate is always linked directly to object files, `#[used]` works as expected. + // Since the local crate is always linked directly to object files, `#[used]` works as expected, + // we only need add undefined symbols. linked_symbols.extend( exported_symbols - .iter() - .filter(|(_, info)| { - info.level.is_below_threshold(export_threshold) || info.used + .filter(|(symbol, _)| match symbol { + ExportedSymbol::NonGeneric { cgu, .. } => cgu.is_none(), + ExportedSymbol::Generic(..) + | ExportedSymbol::DropGlue(..) + | ExportedSymbol::AsyncDropGlueCtorShim(..) => false, + ExportedSymbol::ThreadLocalShim(_def_id) => false, + ExportedSymbol::NoDefId(..) => true, }) .map(|&(symbol, info)| { ( @@ -1872,35 +1855,33 @@ pub(crate) fn linked_objects( ); return; } - // TODO: let lto = upstream_rust_objects_already_included && !ignored_for_lto(tcx.sess, &codegen_results.crate_info, cnum); + // FIXME: should be `let lto = upstream_rust_objects_already_included && !ignored_for_lto(tcx.sess, &codegen_results.crate_info, cnum);` let lto = upstream_rust_objects_already_included; - if lto { - return; + let mut cgus = FxHashSet::default(); + for &(symbol, info) in exported_symbols { + match symbol { + ExportedSymbol::NonGeneric { cgu: Some(cgu), .. } => { + if !lto { + cgus.insert(cgu.as_str().to_string()); + } + } + ExportedSymbol::NonGeneric { cgu: None, .. } | ExportedSymbol::NoDefId(..) => { + // Unresolved symbols may come from external libraries. + linked_symbols.push(( + symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum), + info.kind, + )); + } + ExportedSymbol::Generic(..) + | ExportedSymbol::DropGlue(..) + | ExportedSymbol::AsyncDropGlueCtorShim(..) + | ExportedSymbol::ThreadLocalShim(..) => {} + }; } - let symbols: Vec<_> = exported_symbols - .iter() - .filter(|(_, info)| info.level.is_below_threshold(export_threshold) || info.used) - .map(|&(symbol, info)| { - ( - symbol_export::linking_symbol_name_for_instance_in_crate(tcx, symbol, cnum), - info.kind, - ) - }) - .collect(); - if symbols.is_empty() { + if cgus.is_empty() { return; } - let used_crate_source = tcx.used_crate_source(cnum); - let cratepath = &used_crate_source.rlib.as_ref().unwrap().0; - let mut crate_linked_symbols: FxHashSet<_> = - symbols.iter().map(|(symbol, _)| symbol.to_string()).collect(); - if let Some(archive_offsets) = add_linked_objects(cratepath, &mut crate_linked_symbols) { - objects.insert(cnum, archive_offsets); - } - // Unresolved symbols may come from external libraries. - linked_symbols.extend( - symbols.into_iter().filter(|(symbol, _)| crate_linked_symbols.contains(symbol)), - ); + objects.insert(cnum, cgus); }); objects diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index 459f4329d6e92..78c4819db7bd5 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -9,10 +9,12 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::middle::exported_symbols::{ ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name, }; +use rustc_middle::mir::mono::{CodegenUnit, MonoItem}; use rustc_middle::query::LocalCrate; use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt}; use rustc_middle::util::Providers; -use rustc_session::config::{CrateType, OomStrategy}; +use rustc_session::config::{CrateType, OomStrategy, OutputFilenames, OutputType}; +use rustc_span::Symbol; use rustc_target::callconv::Conv; use rustc_target::spec::{SanitizerSet, TlsModel}; use tracing::debug; @@ -168,6 +170,36 @@ fn is_reachable_non_generic_provider_extern(tcx: TyCtxt<'_>, def_id: DefId) -> b tcx.reachable_non_generics(def_id.krate).contains_key(&def_id) } +fn find_codegen_unit<'tcx>( + tcx: TyCtxt<'tcx>, + codegen_units: &'tcx [CodegenUnit<'tcx>], + outputs: &'tcx OutputFilenames, + def_id: DefId, +) -> Option { + if !tcx.is_codegened_item(def_id) { + return None; + } + let item = if tcx.is_static(def_id) { + MonoItem::Static(def_id) + } else { + MonoItem::Fn(Instance::mono(tcx, def_id)) + }; + codegen_units.iter().find_map(|cgu| { + if cgu.contains_item(&item) { + Some(Symbol::intern( + outputs + .temp_path(OutputType::Object, Some(cgu.name().as_str())) + .file_name() + .unwrap() + .to_str() + .unwrap(), + )) + } else { + None + } + }) +} + fn exported_symbols_provider_local( tcx: TyCtxt<'_>, _: LocalCrate, @@ -182,8 +214,20 @@ fn exported_symbols_provider_local( tcx.reachable_non_generics(LOCAL_CRATE).to_sorted(&hcx, true) }); - let mut symbols: Vec<_> = - sorted.iter().map(|&(&def_id, &info)| (ExportedSymbol::NonGeneric(def_id), info)).collect(); + let outputs = tcx.output_filenames(()); + let codegen_units = tcx.collect_and_partition_mono_items(()).codegen_units; + let mut symbols: Vec<_> = sorted + .iter() + .map(|&(&def_id, &info)| { + ( + ExportedSymbol::NonGeneric { + def_id, + cgu: find_codegen_unit(tcx, codegen_units, outputs, def_id), + }, + info, + ) + }) + .collect(); // Export TLS shims if !tcx.sess.target.dll_tls_export { @@ -433,7 +477,7 @@ fn upstream_monomorphizations_provider( continue; } } - ExportedSymbol::NonGeneric(..) + ExportedSymbol::NonGeneric { .. } | ExportedSymbol::ThreadLocalShim(..) | ExportedSymbol::NoDefId(..) => { // These are no monomorphizations @@ -545,7 +589,7 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>( // This is something instantiated in an upstream crate, so we have to use // the slower (because uncached) version of computing the symbol name. match symbol { - ExportedSymbol::NonGeneric(def_id) => { + ExportedSymbol::NonGeneric { def_id, .. } => { rustc_symbol_mangling::symbol_name_for_instance_in_crate( tcx, Instance::mono(tcx, def_id), @@ -590,12 +634,12 @@ fn calling_convention_for_symbol<'tcx>( symbol: ExportedSymbol<'tcx>, ) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) { let instance = match symbol { - ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _) + ExportedSymbol::NonGeneric { def_id, .. } | ExportedSymbol::Generic(def_id, _) if tcx.is_static(def_id) => { None } - ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)), + ExportedSymbol::NonGeneric { def_id, .. } => Some(Instance::mono(tcx, def_id)), ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)), // DropGlue always use the Rust calling convention and thus follow the target's default // symbol decoration scheme. @@ -711,7 +755,7 @@ fn maybe_emutls_symbol_name<'tcx>( undecorated: &str, ) -> Option { if matches!(tcx.sess.tls_model(), TlsModel::Emulated) - && let ExportedSymbol::NonGeneric(def_id) = symbol + && let ExportedSymbol::NonGeneric { def_id, .. } = symbol && tcx.is_thread_local_static(def_id) { // When using emutls, LLVM will add the `__emutls_v.` prefix to thread local symbols, diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 356d4628af8c1..916215cf304e2 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -27,7 +27,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use rustc_ast as ast; -use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet}; +use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; use rustc_data_structures::unord::UnordMap; use rustc_hir::CRATE_HIR_ID; use rustc_hir::def_id::CrateNum; @@ -194,7 +194,7 @@ pub struct CrateInfo { pub crate_types: Vec, pub exported_symbols: UnordMap>, pub linked_symbols: FxIndexMap>, - pub linked_objects: FxIndexMap>>, + pub linked_objects: FxIndexMap>>, pub local_crate_name: Symbol, pub compiler_builtins: Option, pub profiler_runtime: Option, diff --git a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs index 776b081a4630f..c207dfc5cb246 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs @@ -367,7 +367,7 @@ provide! { tcx, def_id, other, cdata, .exported_symbols(cdata.cnum) .iter() .filter_map(|&(exported_symbol, export_info)| { - if let ExportedSymbol::NonGeneric(def_id) = exported_symbol { + if let ExportedSymbol::NonGeneric { def_id, .. } = exported_symbol { Some((def_id, export_info)) } else { None diff --git a/compiler/rustc_middle/src/middle/exported_symbols.rs b/compiler/rustc_middle/src/middle/exported_symbols.rs index 0bfbd39879747..ebf09ac6eeaa0 100644 --- a/compiler/rustc_middle/src/middle/exported_symbols.rs +++ b/compiler/rustc_middle/src/middle/exported_symbols.rs @@ -1,5 +1,6 @@ use rustc_hir::def_id::{DefId, LOCAL_CRATE}; use rustc_macros::{Decodable, Encodable, HashStable, TyDecodable, TyEncodable}; +use rustc_span::Symbol; use crate::ty::{self, GenericArgsRef, Ty, TyCtxt}; @@ -40,7 +41,7 @@ pub struct SymbolExportInfo { #[derive(Eq, PartialEq, Debug, Copy, Clone, TyEncodable, TyDecodable, HashStable)] pub enum ExportedSymbol<'tcx> { - NonGeneric(DefId), + NonGeneric { def_id: DefId, cgu: Option }, Generic(DefId, GenericArgsRef<'tcx>), DropGlue(Ty<'tcx>), AsyncDropGlueCtorShim(Ty<'tcx>), @@ -53,7 +54,9 @@ impl<'tcx> ExportedSymbol<'tcx> { /// local crate. pub fn symbol_name_for_local_instance(&self, tcx: TyCtxt<'tcx>) -> ty::SymbolName<'tcx> { match *self { - ExportedSymbol::NonGeneric(def_id) => tcx.symbol_name(ty::Instance::mono(tcx, def_id)), + ExportedSymbol::NonGeneric { def_id, .. } => { + tcx.symbol_name(ty::Instance::mono(tcx, def_id)) + } ExportedSymbol::Generic(def_id, args) => { tcx.symbol_name(ty::Instance::new(def_id, args)) } diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs index 6b051da1b5a64..e90705830a7f4 100644 --- a/src/tools/miri/src/bin/miri.rs +++ b/src/tools/miri/src/bin/miri.rs @@ -29,8 +29,8 @@ use std::num::NonZero; use std::ops::Range; use std::path::PathBuf; use std::str::FromStr; -use std::sync::{Arc, Once}; use std::sync::atomic::{AtomicI32, AtomicU32, Ordering}; +use std::sync::{Arc, Once}; use miri::{ BacktraceStyle, BorrowTrackerMethod, MiriConfig, MiriEntryFnType, ProvenanceMode, RetagFields, @@ -81,7 +81,7 @@ fn entry_fn(tcx: TyCtxt<'_>) -> (DefId, MiriEntryFnType) { let sym = tcx.exported_symbols(LOCAL_CRATE).iter().find_map(|(sym, _)| { if sym.symbol_name_for_local_instance(tcx).name == "miri_start" { Some(sym) } else { None } }); - if let Some(ExportedSymbol::NonGeneric(id)) = sym { + if let Some(ExportedSymbol::NonGeneric { def_id: id, .. }) = sym { let start_def_id = id.expect_local(); let start_span = tcx.def_span(start_def_id); @@ -265,7 +265,7 @@ impl rustc_driver::Callbacks for MiriBeRustCompilerCalls { || codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER) { Some(( - ExportedSymbol::NonGeneric(local_def_id.to_def_id()), + ExportedSymbol::NonGeneric { def_id: local_def_id.to_def_id(), cgu: None }, // Some dummy `SymbolExportInfo` here. We only use // `exported_symbols` in shims/foreign_items.rs and the export info // is ignored. diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index a26f12cdfb1e2..a6c86f934d091 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -162,7 +162,7 @@ pub fn iter_exported_symbols<'tcx>( // We can ignore `_export_info` here: we are a Rust crate, and everything is exported // from a Rust crate. for &(symbol, _export_info) in tcx.exported_symbols(cnum) { - if let ExportedSymbol::NonGeneric(def_id) = symbol { + if let ExportedSymbol::NonGeneric { def_id, .. } = symbol { f(cnum, def_id)?; } } From bb7755467724233fd955bc431adb3df2123fc265 Mon Sep 17 00:00:00 2001 From: DianQK Date: Sun, 23 Feb 2025 19:20:33 +0800 Subject: [PATCH 4/4] No need to export C and compiler-builtins symbols for the executable --- compiler/rustc_codegen_ssa/src/back/linker.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index c9571f8f58eba..72aedf212dbd4 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -1828,9 +1828,11 @@ pub(crate) fn linked_objects( are_upstream_rust_objects_already_included(tcx.sess); let export_threshold = symbol_export::crates_export_threshold(&[crate_type]); for_each_exported_symbols_include_dep(tcx, crate_type, |exported_symbols, cnum| { - let exported_symbols = exported_symbols - .iter() - .filter(|(_, info)| info.level.is_below_threshold(export_threshold) || info.used); + let exported_symbols = exported_symbols.iter().filter(|(_, info)| { + (!matches!(crate_type, CrateType::Executable) + && info.level.is_below_threshold(export_threshold)) + || info.used + }); if cnum == LOCAL_CRATE { // Since the local crate is always linked directly to object files, `#[used]` works as expected, // we only need add undefined symbols. @@ -1855,7 +1857,9 @@ pub(crate) fn linked_objects( ); return; } - // FIXME: should be `let lto = upstream_rust_objects_already_included && !ignored_for_lto(tcx.sess, &codegen_results.crate_info, cnum);` + if matches!(crate_type, CrateType::Executable) && tcx.is_compiler_builtins(cnum) { + return; + } let lto = upstream_rust_objects_already_included; let mut cgus = FxHashSet::default(); for &(symbol, info) in exported_symbols {