From ba189788758a68a921b349e07109c18a51f18bda Mon Sep 17 00:00:00 2001 From: Rich Kadel Date: Wed, 5 Aug 2020 22:53:11 -0700 Subject: [PATCH] LLVM IR coverage encoding aligns closer to Clang's I found some areas for improvement while attempting to debug the SegFault issue when running rust programs compiled using MSVC, with coverage instrumentation. I discovered that LLVM's coverage writer was generating incomplete function name variable names (that's not a typo: the name of the variable that holds a function name). The existing implementation used one-up numbers to distinguish variables, and correcting the names did not fix the MSVC coverage bug, but the fix in this PR makes the names and resulting LLVM IR easier to follow and more consistent with Clang's implementation. I also changed the way the `-Zinstrument-coverage` option is supported in symbol_export.rs. The original implementation was incorrect, and the corrected version matches the handling for `-Zprofile-generate`, as it turns out. (An argument could be made that maybe `-Zinstrument-coverage` should automatically enable `-Cprofile-generate`. In fact, if `-Cprofile-generate` is analagous to Clang's `-fprofile-generate`, as some documentation implies, Clang always requires this flag for its implementation of source-based code coverage. This would require a little more validation, and if implemented, would probably require updating some of the user-facing messages related to `-Cprofile-generate` to not be so specific to the PGO use case.) None of these changes fixed the MSVC coverage problems, but they should still be welcome improvements. Lastly, I added some additional FIXME comments in instrument_coverage.rs describing issues I found with the generated LLVM IR that would be resolved if the coverage instrumentation is injected with a `Statement` instead of as a new `BasicBlock`. I describe seven advantages of this change, but it requires some discussion before making a change like this. --- src/librustc_codegen_llvm/coverageinfo/mod.rs | 12 ++- src/librustc_codegen_llvm/intrinsic.rs | 14 ++-- src/librustc_codegen_llvm/llvm/ffi.rs | 2 + .../back/symbol_export.rs | 15 +--- .../traits/coverageinfo.rs | 2 + .../transform/instrument_coverage.rs | 15 ++++ src/rustllvm/CoverageMappingWrapper.cpp | 5 ++ .../instrument-coverage/Makefile | 78 +++++++++++++++---- .../expected_export_coverage.json | 2 +- .../filecheck-patterns.txt | 51 ++++++++++++ .../{main.rs => testprog.rs} | 0 .../typical_show_coverage.txt | 4 +- 12 files changed, 161 insertions(+), 39 deletions(-) create mode 100644 src/test/run-make-fulldeps/instrument-coverage/filecheck-patterns.txt rename src/test/run-make-fulldeps/instrument-coverage/{main.rs => testprog.rs} (100%) diff --git a/src/librustc_codegen_llvm/coverageinfo/mod.rs b/src/librustc_codegen_llvm/coverageinfo/mod.rs index 7b864e499d1de..431acbb1f7e16 100644 --- a/src/librustc_codegen_llvm/coverageinfo/mod.rs +++ b/src/librustc_codegen_llvm/coverageinfo/mod.rs @@ -8,7 +8,7 @@ use llvm::coverageinfo::CounterMappingRegion; use log::debug; use rustc_codegen_ssa::coverageinfo::map::{CounterExpression, ExprKind, FunctionCoverage, Region}; use rustc_codegen_ssa::traits::{ - BaseTypeMethods, CoverageInfoBuilderMethods, CoverageInfoMethods, StaticMethods, + BaseTypeMethods, CoverageInfoBuilderMethods, CoverageInfoMethods, MiscMethods, StaticMethods, }; use rustc_data_structures::fx::FxHashMap; use rustc_llvm::RustString; @@ -44,6 +44,16 @@ impl CoverageInfoMethods for CodegenCx<'ll, 'tcx> { } impl CoverageInfoBuilderMethods<'tcx> for Builder<'a, 'll, 'tcx> { + /// Calls llvm::createPGOFuncNameVar() with the given function instance's mangled function name. + /// The LLVM API returns an llvm::GlobalVariable containing the function name, with the specific + /// variable name and linkage required by LLVM InstrProf source-based coverage instrumentation. + fn create_pgo_func_name_var(&self, instance: Instance<'tcx>) -> Self::Value { + let llfn = self.cx.get_fn(instance); + let mangled_fn_name = CString::new(self.tcx.symbol_name(instance).name) + .expect("error converting function name to C string"); + unsafe { llvm::LLVMRustCoverageCreatePGOFuncNameVar(llfn, mangled_fn_name.as_ptr()) } + } + fn add_counter_region( &mut self, instance: Instance<'tcx>, diff --git a/src/librustc_codegen_llvm/intrinsic.rs b/src/librustc_codegen_llvm/intrinsic.rs index dfd5104a31f6d..14aec1ff5e19e 100644 --- a/src/librustc_codegen_llvm/intrinsic.rs +++ b/src/librustc_codegen_llvm/intrinsic.rs @@ -215,19 +215,19 @@ impl IntrinsicCallMethods<'tcx> for Builder<'a, 'll, 'tcx> { self.call(llfn, &[], None) } sym::count_code_region => { - let coverageinfo = tcx.coverageinfo(caller_instance.def_id()); - let mangled_fn = tcx.symbol_name(caller_instance); - let (mangled_fn_name, _len_val) = self.const_str(Symbol::intern(mangled_fn.name)); - let num_counters = self.const_u32(coverageinfo.num_counters); use coverage::count_code_region_args::*; + let coverageinfo = tcx.coverageinfo(caller_instance.def_id()); + + let fn_name = self.create_pgo_func_name_var(caller_instance); let hash = args[FUNCTION_SOURCE_HASH].immediate(); + let num_counters = self.const_u32(coverageinfo.num_counters); let index = args[COUNTER_ID].immediate(); debug!( "translating Rust intrinsic `count_code_region()` to LLVM intrinsic: \ - instrprof.increment(fn_name={}, hash={:?}, num_counters={:?}, index={:?})", - mangled_fn.name, hash, num_counters, index, + instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, index={:?})", + fn_name, hash, num_counters, index, ); - self.instrprof_increment(mangled_fn_name, hash, num_counters, index) + self.instrprof_increment(fn_name, hash, num_counters, index) } sym::va_start => self.va_start(args[0].immediate()), sym::va_end => self.va_end(args[0].immediate()), diff --git a/src/librustc_codegen_llvm/llvm/ffi.rs b/src/librustc_codegen_llvm/llvm/ffi.rs index eb7dc827f9391..a1c1ed9856d67 100644 --- a/src/librustc_codegen_llvm/llvm/ffi.rs +++ b/src/librustc_codegen_llvm/llvm/ffi.rs @@ -1802,6 +1802,8 @@ extern "C" { BufferOut: &RustString, ); + pub fn LLVMRustCoverageCreatePGOFuncNameVar(F: &'a Value, FuncName: *const c_char) + -> &'a Value; pub fn LLVMRustCoverageComputeHash(Name: *const c_char) -> u64; #[allow(improper_ctypes)] diff --git a/src/librustc_codegen_ssa/back/symbol_export.rs b/src/librustc_codegen_ssa/back/symbol_export.rs index 7d742e7a7afd2..e7c789ad21026 100644 --- a/src/librustc_codegen_ssa/back/symbol_export.rs +++ b/src/librustc_codegen_ssa/back/symbol_export.rs @@ -190,7 +190,9 @@ fn exported_symbols_provider_local( } } - if tcx.sess.opts.cg.profile_generate.enabled() { + if tcx.sess.opts.debugging_opts.instrument_coverage + || tcx.sess.opts.cg.profile_generate.enabled() + { // These are weak symbols that point to the profile version and the // profile name, which need to be treated as exported so LTO doesn't nix // them. @@ -203,17 +205,6 @@ fn exported_symbols_provider_local( })); } - if tcx.sess.opts.debugging_opts.instrument_coverage { - // Similar to PGO profiling, preserve symbols used by LLVM InstrProf coverage profiling. - const COVERAGE_WEAK_SYMBOLS: [&str; 3] = - ["__llvm_profile_filename", "__llvm_coverage_mapping", "__llvm_covmap"]; - - symbols.extend(COVERAGE_WEAK_SYMBOLS.iter().map(|sym| { - let exported_symbol = ExportedSymbol::NoDefId(SymbolName::new(tcx, sym)); - (exported_symbol, SymbolExportLevel::C) - })); - } - if tcx.sess.opts.debugging_opts.sanitizer.contains(SanitizerSet::MEMORY) { // Similar to profiling, preserve weak msan symbol during LTO. const MSAN_WEAK_SYMBOLS: [&str; 2] = ["__msan_track_origins", "__msan_keep_going"]; diff --git a/src/librustc_codegen_ssa/traits/coverageinfo.rs b/src/librustc_codegen_ssa/traits/coverageinfo.rs index 2b5878f46bc43..5602599b0c25b 100644 --- a/src/librustc_codegen_ssa/traits/coverageinfo.rs +++ b/src/librustc_codegen_ssa/traits/coverageinfo.rs @@ -7,6 +7,8 @@ pub trait CoverageInfoMethods: BackendTypes { } pub trait CoverageInfoBuilderMethods<'tcx>: BackendTypes { + fn create_pgo_func_name_var(&self, instance: Instance<'tcx>) -> Self::Value; + fn add_counter_region( &mut self, instance: Instance<'tcx>, diff --git a/src/librustc_mir/transform/instrument_coverage.rs b/src/librustc_mir/transform/instrument_coverage.rs index 5b2954dd5b0a3..500d66ece0676 100644 --- a/src/librustc_mir/transform/instrument_coverage.rs +++ b/src/librustc_mir/transform/instrument_coverage.rs @@ -295,6 +295,21 @@ impl<'a, 'tcx> Instrumentor<'a, 'tcx> { let (file_name, start_line, start_col, end_line, end_col) = self.code_region(&span); + // FIXME(richkadel): Note that `const_str()` results in the creation of an `Allocation` to + // hold one copy of each unique filename. It looks like that `Allocation` may translate into + // the creation of an `@alloc` in LLVM IR that is never actually used by runtime code. + // + // Example LLVM IR: + // + // @alloc4 = private unnamed_addr constant <{ [43 x i8] }> \ + // <{ [43 x i8] c"C:\\msys64\\home\\richkadel\\rust\\rust_basic.rs" }>, align 1 + // + // Can I flag the alloc as something not to be added to codegen? Or somehow remove it before + // it gets added to the LLVM IR? Do we need some kind of reference counting to know it's + // not used by any runtime code? + // + // This question is moot if I convert the Call Terminators to Statements, I believe: + // https://rust-lang.zulipchat.com/#narrow/stream/233931-t-compiler.2Fmajor-changes/topic/Implement.20LLVM-compatible.20source-based.20cod.20compiler-team.23278/near/206731748 args.push(self.const_str(&file_name, inject_at)); args.push(self.const_u32(start_line, inject_at)); args.push(self.const_u32(start_col, inject_at)); diff --git a/src/rustllvm/CoverageMappingWrapper.cpp b/src/rustllvm/CoverageMappingWrapper.cpp index 7c8481540aae4..4d15e31df15a3 100644 --- a/src/rustllvm/CoverageMappingWrapper.cpp +++ b/src/rustllvm/CoverageMappingWrapper.cpp @@ -38,6 +38,11 @@ extern "C" void LLVMRustCoverageWriteMappingToBuffer( CoverageMappingWriter.write(OS); } +extern "C" LLVMValueRef LLVMRustCoverageCreatePGOFuncNameVar(LLVMValueRef F, const char *FuncName) { + StringRef FuncNameRef(FuncName); + return wrap(createPGOFuncNameVar(*cast(unwrap(F)), FuncNameRef)); +} + extern "C" uint64_t LLVMRustCoverageComputeHash(const char *Name) { StringRef NameRef(Name); return IndexedInstrProf::ComputeHash(NameRef); diff --git a/src/test/run-make-fulldeps/instrument-coverage/Makefile b/src/test/run-make-fulldeps/instrument-coverage/Makefile index df47305b547a8..4392cfec080dc 100644 --- a/src/test/run-make-fulldeps/instrument-coverage/Makefile +++ b/src/test/run-make-fulldeps/instrument-coverage/Makefile @@ -3,55 +3,101 @@ # FIXME(richkadel): Debug the following problem, and reenable on Windows (by # removing the `# ignore-msvc` directive above). The current implementation -# generates a segfault when running the instrumented `main` executable, -# after the `main` program code executes, but before the process terminates. -# This most likely points to a problem generating the LLVM "main.profraw" +# generates a segfault when running the instrumented `testprog` executable, +# after the `main()` function completes, but before the process terminates. +# This most likely points to a problem generating the LLVM "testprog.profraw" # file. -include ../tools.mk +UNAME = $(shell uname) + +ifeq ($(UNAME),Darwin) + INSTR_PROF_DATA_SUFFIX=,regular,live_support + DATA_SECTION_PREFIX=__DATA, + LLVM_COV_SECTION_PREFIX=__LLVM_COV, +else + INSTR_PROF_DATA_SUFFIX= + DATA_SECTION_PREFIX= + LLVM_COV_SECTION_PREFIX= +endif + # This test makes sure that LLVM coverage maps are genereated in LLVM IR. COMMON_FLAGS=-Zinstrument-coverage all: # Compile the test program with instrumentation, and also generate LLVM IR - $(RUSTC) $(COMMON_FLAGS) main.rs + $(RUSTC) $(COMMON_FLAGS) testprog.rs \ + --emit=link,llvm-ir + + # check the LLVM IR +ifdef IS_WIN32 + cat "$(TMPDIR)"/testprog.ll | "$(LLVM_FILECHECK)" filecheck-patterns.txt \ + -check-prefixes=CHECK,WIN32 \ + -DPRIVATE_GLOBAL="internal global" \ + -DINSTR_PROF_DATA=".lprfd$$M" \ + -DINSTR_PROF_NAME=".lprfn$$M" \ + -DINSTR_PROF_CNTS=".lprfc$$M" \ + -DINSTR_PROF_VALS=".lprfv$$M" \ + -DINSTR_PROF_VNODES=".lprfnd$$M" \ + -DINSTR_PROF_COVMAP=".lcovmap$$M" \ + -DINSTR_PROF_ORDERFILE=".lorderfile$$M" +else + cat "$(TMPDIR)"/testprog.ll | "$(LLVM_FILECHECK)" filecheck-patterns.txt \ + -check-prefixes=CHECK \ + -DPRIVATE_GLOBAL="private global" \ + -DINSTR_PROF_DATA="$(DATA_SECTION_PREFIX)__llvm_prf_data$(INSTR_PROF_DATA_SUFFIX)" \ + -DINSTR_PROF_NAME="$(DATA_SECTION_PREFIX)__llvm_prf_names" \ + -DINSTR_PROF_CNTS="$(DATA_SECTION_PREFIX)__llvm_prf_cnts" \ + -DINSTR_PROF_VALS="$(DATA_SECTION_PREFIX)__llvm_prf_vals" \ + -DINSTR_PROF_VNODES="$(DATA_SECTION_PREFIX)__llvm_prf_vnds" \ + -DINSTR_PROF_COVMAP="$(LLVM_COV_SECTION_PREFIX)__llvm_covmap" \ + -DINSTR_PROF_ORDERFILE="$(DATA_SECTION_PREFIX)__llvm_orderfile" +endif # Run it in order to generate some profiling data, # with `LLVM_PROFILE_FILE=` environment variable set to # output the coverage stats for this run. - LLVM_PROFILE_FILE="$(TMPDIR)"/main.profraw \ - $(call RUN,main) + LLVM_PROFILE_FILE="$(TMPDIR)"/testprog.profraw \ + $(call RUN,testprog) # Postprocess the profiling data so it can be used by the llvm-cov tool "$(LLVM_BIN_DIR)"/llvm-profdata merge --sparse \ - "$(TMPDIR)"/main.profraw \ - -o "$(TMPDIR)"/main.profdata + "$(TMPDIR)"/testprog.profraw \ + -o "$(TMPDIR)"/testprog.profdata # Generate a coverage report using `llvm-cov show`. The output ordering # can be non-deterministic, so ignore the return status. If the test fails # when comparing the JSON `export`, the `show` output may be useful when # debugging. "$(LLVM_BIN_DIR)"/llvm-cov show \ - --Xdemangler="$(RUST_DEMANGLER)" \ - --show-line-counts-or-regions \ - --instr-profile="$(TMPDIR)"/main.profdata \ - $(call BIN,"$(TMPDIR)"/main) \ + --Xdemangler="$(RUST_DEMANGLER)" \ + --show-line-counts-or-regions \ + --instr-profile="$(TMPDIR)"/testprog.profdata \ + $(call BIN,"$(TMPDIR)"/testprog) \ > "$(TMPDIR)"/actual_show_coverage.txt +ifdef RUSTC_BLESS_TEST + cp "$(TMPDIR)"/actual_show_coverage.txt typical_show_coverage.txt +else # Compare the show coverage output $(DIFF) typical_show_coverage.txt "$(TMPDIR)"/actual_show_coverage.txt || \ - >&2 echo 'diff failed for `llvm-cov show` (might not be an error)' + >&2 echo 'diff failed for `llvm-cov show` (might not be an error)' +endif # Generate a coverage report in JSON, using `llvm-cov export`, and fail if # there are differences from the expected output. "$(LLVM_BIN_DIR)"/llvm-cov export \ - --summary-only \ - --instr-profile="$(TMPDIR)"/main.profdata \ - $(call BIN,"$(TMPDIR)"/main) \ + --summary-only \ + --instr-profile="$(TMPDIR)"/testprog.profdata \ + $(call BIN,"$(TMPDIR)"/testprog) \ | "$(PYTHON)" prettify_json.py \ > "$(TMPDIR)"/actual_export_coverage.json +ifdef RUSTC_BLESS_TEST + cp "$(TMPDIR)"/actual_export_coverage.json expected_export_coverage.json +else # Check that the exported JSON coverage data matches what we expect $(DIFF) expected_export_coverage.json "$(TMPDIR)"/actual_export_coverage.json +endif diff --git a/src/test/run-make-fulldeps/instrument-coverage/expected_export_coverage.json b/src/test/run-make-fulldeps/instrument-coverage/expected_export_coverage.json index 9d739a89114e2..5881cf4b7dbbc 100644 --- a/src/test/run-make-fulldeps/instrument-coverage/expected_export_coverage.json +++ b/src/test/run-make-fulldeps/instrument-coverage/expected_export_coverage.json @@ -3,7 +3,7 @@ { "files": [ { - "filename": "main.rs", + "filename": "testprog.rs", "summary": { "functions": { "count": 7, diff --git a/src/test/run-make-fulldeps/instrument-coverage/filecheck-patterns.txt b/src/test/run-make-fulldeps/instrument-coverage/filecheck-patterns.txt new file mode 100644 index 0000000000000..5a7cc9a188202 --- /dev/null +++ b/src/test/run-make-fulldeps/instrument-coverage/filecheck-patterns.txt @@ -0,0 +1,51 @@ +# Check for metadata, variables, declarations, and function definitions injected +# into LLVM IR when compiling with -Zinstrument-coverage. + +WIN32: $__llvm_profile_runtime_user = comdat any + +CHECK: @__llvm_coverage_mapping = internal constant +CHECK-SAME: section "[[INSTR_PROF_COVMAP]]", align 8 + +WIN32: @__llvm_profile_runtime = external global i32 + +CHECK: @__profc__R{{[a-zA-Z0-9_]+}}testprog14will_be_called = [[PRIVATE_GLOBAL]] +CHECK-SAME: section "[[INSTR_PROF_CNTS]]", align 8 + +CHECK: @__profd__R{{[a-zA-Z0-9_]+}}testprog14will_be_called = [[PRIVATE_GLOBAL]] +CHECK-SAME: @__profc__R{{[a-zA-Z0-9_]+}}testprog14will_be_called, +CHECK-SAME: ()* @_R{{[a-zA-Z0-9_]+}}testprog14will_be_called to i8*), +CHECK-SAME: section "[[INSTR_PROF_DATA]]", align 8 + +CHECK: @__profc__R{{[a-zA-Z0-9_]+}}testprog4main = [[PRIVATE_GLOBAL]] +CHECK-SAME: section "[[INSTR_PROF_CNTS]]", align 8 + +CHECK: @__profd__R{{[a-zA-Z0-9_]+}}testprog4main = [[PRIVATE_GLOBAL]] +CHECK-SAME: @__profc__R{{[a-zA-Z0-9_]+}}testprog4main, +CHECK-SAME: ()* @_R{{[a-zA-Z0-9_]+}}testprog4main to i8*), +CHECK-SAME: section "[[INSTR_PROF_DATA]]", align 8 + +CHECK: @__llvm_prf_nm = private constant +CHECK-SAME: section "[[INSTR_PROF_NAME]]", align 1 + +CHECK: @llvm.used = appending global +CHECK-SAME: i8* bitcast ({ {{.*}} }* @__llvm_coverage_mapping to i8*) +WIN32-SAME: i8* bitcast (i32 ()* @__llvm_profile_runtime_user to i8*) +CHECK-SAME: i8* bitcast ({ {{.*}} }* @__profd__R{{[a-zA-Z0-9_]*}}testprog4main to i8*) +CHECK-SAME: i8* getelementptr inbounds ({{.*}}* @__llvm_prf_nm, i32 0, i32 0) +CHECK-SAME: section "llvm.metadata" + +CHECK: define hidden { {{.*}} } @_R{{[a-zA-Z0-9_]+}}testprog14will_be_called() unnamed_addr #{{[0-9]+}} { +CHECK-NEXT: start: +CHECK-NOT: bb{{[0-9]+}}: +CHECK: %pgocount = load i64, i64* getelementptr inbounds +CHECK-SAME: * @__profc__R{{[a-zA-Z0-9_]+}}testprog14will_be_called, + +CHECK: declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #[[LLVM_INSTRPROF_INCREMENT_ATTR:[0-9]+]] + +WIN32: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() #[[LLVM_PROFILE_RUNTIME_USER_ATTR:[0-9]+]] comdat { +WIN32-NEXT: %1 = load i32, i32* @__llvm_profile_runtime +WIN32-NEXT: ret i32 %1 +WIN32-NEXT: } + +CHECK: attributes #[[LLVM_INSTRPROF_INCREMENT_ATTR]] = { nounwind } +WIN32: attributes #[[LLVM_PROFILE_RUNTIME_USER_ATTR]] = { noinline } \ No newline at end of file diff --git a/src/test/run-make-fulldeps/instrument-coverage/main.rs b/src/test/run-make-fulldeps/instrument-coverage/testprog.rs similarity index 100% rename from src/test/run-make-fulldeps/instrument-coverage/main.rs rename to src/test/run-make-fulldeps/instrument-coverage/testprog.rs diff --git a/src/test/run-make-fulldeps/instrument-coverage/typical_show_coverage.txt b/src/test/run-make-fulldeps/instrument-coverage/typical_show_coverage.txt index 9c593d0809d48..ae123afff0400 100644 --- a/src/test/run-make-fulldeps/instrument-coverage/typical_show_coverage.txt +++ b/src/test/run-make-fulldeps/instrument-coverage/typical_show_coverage.txt @@ -25,14 +25,14 @@ 25| 2| } 26| 2|} ------------------ - | main[317d481089b8c8fe]::wrap_with::: + | testprog[317d481089b8c8fe]::wrap_with::: | 22| 1|{ | 23| 1| if should_wrap { | 24| 1| wrapper(&inner) | 25| 1| } | 26| 1|} ------------------ - | main[317d481089b8c8fe]::wrap_with::: + | testprog[317d481089b8c8fe]::wrap_with::: | 22| 1|{ | 23| 1| if should_wrap { | 24| 1| wrapper(&inner)