rustc_codegen_llvm/coverageinfo/
mapgen.rs

1use std::sync::Arc;
2
3use itertools::Itertools;
4use rustc_abi::Align;
5use rustc_codegen_ssa::traits::{
6    BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
7};
8use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
9use rustc_hir::def_id::{DefId, LocalDefId};
10use rustc_index::IndexVec;
11use rustc_middle::mir;
12use rustc_middle::mir::mono::MonoItemPartitions;
13use rustc_middle::ty::{self, TyCtxt};
14use rustc_session::RemapFileNameExt;
15use rustc_session::config::RemapPathScopeComponents;
16use rustc_span::def_id::DefIdSet;
17use rustc_span::{SourceFile, StableSourceFileId};
18use tracing::debug;
19
20use crate::common::CodegenCx;
21use crate::coverageinfo::llvm_cov;
22use crate::coverageinfo::mapgen::covfun::prepare_covfun_record;
23use crate::llvm;
24
25mod covfun;
26mod spans;
27
28/// Generates and exports the coverage map, which is embedded in special
29/// linker sections in the final binary.
30///
31/// Those sections are then read and understood by LLVM's `llvm-cov` tool,
32/// which is distributed in the `llvm-tools` rustup component.
33pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
34    let tcx = cx.tcx;
35
36    // Ensure that LLVM is using a version of the coverage mapping format that
37    // agrees with our Rust-side code. Expected versions (encoded as n-1) are:
38    // - `CovMapVersion::Version7` (6) used by LLVM 18-19
39    let covmap_version = {
40        let llvm_covmap_version = llvm_cov::mapping_version();
41        let expected_versions = 6..=6;
42        assert!(
43            expected_versions.contains(&llvm_covmap_version),
44            "Coverage mapping version exposed by `llvm-wrapper` is out of sync; \
45            expected {expected_versions:?} but was {llvm_covmap_version}"
46        );
47        // This is the version number that we will embed in the covmap section:
48        llvm_covmap_version
49    };
50
51    debug!("Generating coverage map for CodegenUnit: `{}`", cx.codegen_unit.name());
52
53    // FIXME(#132395): Can this be none even when coverage is enabled?
54    let instances_used = match cx.coverage_cx {
55        Some(ref cx) => cx.instances_used.borrow(),
56        None => return,
57    };
58
59    // The order of entries in this global file table needs to be deterministic,
60    // and ideally should also be independent of the details of stable-hashing,
61    // because coverage tests snapshots (`.cov-map`) can observe the order and
62    // would need to be re-blessed if it changes. As long as those requirements
63    // are satisfied, the order can be arbitrary.
64    let mut global_file_table = GlobalFileTable::new();
65
66    let mut covfun_records = instances_used
67        .iter()
68        .copied()
69        // Sort by symbol name, so that the global file table is built in an
70        // order that doesn't depend on the stable-hash-based order in which
71        // instances were visited during codegen.
72        .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name)
73        .filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true))
74        .collect::<Vec<_>>();
75
76    // In a single designated CGU, also prepare covfun records for functions
77    // in this crate that were instrumented for coverage, but are unused.
78    if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
79        let mut unused_instances = gather_unused_function_instances(cx);
80        // Sort the unused instances by symbol name, for the same reason as the used ones.
81        unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name);
82        covfun_records.extend(unused_instances.into_iter().filter_map(|instance| {
83            prepare_covfun_record(tcx, &mut global_file_table, instance, false)
84        }));
85    }
86
87    // If there are no covfun records for this CGU, don't generate a covmap record.
88    // Emitting a covmap record without any covfun records causes `llvm-cov` to
89    // fail when generating coverage reports, and if there are no covfun records
90    // then the covmap record isn't useful anyway.
91    // This should prevent a repeat of <https://github.com/rust-lang/rust/issues/133606>.
92    if covfun_records.is_empty() {
93        return;
94    }
95
96    // Encode all filenames referenced by coverage mappings in this CGU.
97    let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
98    // The `llvm-cov` tool uses this hash to associate each covfun record with
99    // its corresponding filenames table, since the final binary will typically
100    // contain multiple covmap records from different compilation units.
101    let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
102
103    let mut unused_function_names = vec![];
104
105    for covfun in &covfun_records {
106        unused_function_names.extend(covfun.mangled_function_name_if_unused());
107
108        covfun::generate_covfun_record(cx, filenames_hash, covfun)
109    }
110
111    // For unused functions, we need to take their mangled names and store them
112    // in a specially-named global array. LLVM's `InstrProfiling` pass will
113    // detect this global and include those names in its `__llvm_prf_names`
114    // section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
115    if !unused_function_names.is_empty() {
116        assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
117
118        let name_globals = unused_function_names
119            .into_iter()
120            .map(|mangled_function_name| cx.const_str(mangled_function_name).0)
121            .collect::<Vec<_>>();
122        let initializer = cx.const_array(cx.type_ptr(), &name_globals);
123
124        let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names");
125        llvm::set_global_constant(array, true);
126        llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
127        llvm::set_initializer(array, initializer);
128    }
129
130    // Generate the coverage map header, which contains the filenames used by
131    // this CGU's coverage mappings, and store it in a well-known global.
132    // (This is skipped if we returned early due to having no covfun records.)
133    generate_covmap_record(cx, covmap_version, &filenames_buffer);
134}
135
136/// Maps "global" (per-CGU) file ID numbers to their underlying source files.
137struct GlobalFileTable {
138    /// This "raw" table doesn't include the working dir, so a file's
139    /// global ID is its index in this set **plus one**.
140    raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>,
141}
142
143impl GlobalFileTable {
144    fn new() -> Self {
145        Self { raw_file_table: FxIndexMap::default() }
146    }
147
148    fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId {
149        // Ensure the given file has a table entry, and get its index.
150        let entry = self.raw_file_table.entry(file.stable_id);
151        let raw_id = entry.index();
152        entry.or_insert_with(|| Arc::clone(file));
153
154        // The raw file table doesn't include an entry for the working dir
155        // (which has ID 0), so add 1 to get the correct ID.
156        GlobalFileId::from_usize(raw_id + 1)
157    }
158
159    fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
160        let mut table = Vec::with_capacity(self.raw_file_table.len() + 1);
161
162        // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
163        // requires setting the first filename to the compilation directory.
164        // Since rustc generates coverage maps with relative paths, the
165        // compilation directory can be combined with the relative paths
166        // to get absolute paths, if needed.
167        table.push(
168            tcx.sess
169                .opts
170                .working_dir
171                .for_scope(tcx.sess, RemapPathScopeComponents::MACRO)
172                .to_string_lossy(),
173        );
174
175        // Add the regular entries after the base directory.
176        table.extend(self.raw_file_table.values().map(|file| {
177            file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy()
178        }));
179
180        llvm_cov::write_filenames_to_buffer(&table)
181    }
182}
183
184rustc_index::newtype_index! {
185    /// An index into the CGU's overall list of file paths. The underlying paths
186    /// will be embedded in the `__llvm_covmap` linker section.
187    struct GlobalFileId {}
188}
189rustc_index::newtype_index! {
190    /// An index into a function's list of global file IDs. That underlying list
191    /// of local-to-global mappings will be embedded in the function's record in
192    /// the `__llvm_covfun` linker section.
193    struct LocalFileId {}
194}
195
196/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
197/// file IDs.
198#[derive(Debug, Default)]
199struct VirtualFileMapping {
200    local_to_global: IndexVec<LocalFileId, GlobalFileId>,
201    global_to_local: FxIndexMap<GlobalFileId, LocalFileId>,
202}
203
204impl VirtualFileMapping {
205    fn local_id_for_global(&mut self, global_file_id: GlobalFileId) -> LocalFileId {
206        *self
207            .global_to_local
208            .entry(global_file_id)
209            .or_insert_with(|| self.local_to_global.push(global_file_id))
210    }
211
212    fn to_vec(&self) -> Vec<u32> {
213        // This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`,
214        // but it isn't hot or expensive enough to justify the extra unsafety.
215        self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect()
216    }
217}
218
219/// Generates the contents of the covmap record for this CGU, which mostly
220/// consists of a header and a list of filenames. The record is then stored
221/// as a global variable in the `__llvm_covmap` section.
222fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_buffer: &[u8]) {
223    // A covmap record consists of four target-endian u32 values, followed by
224    // the encoded filenames table. Two of the header fields are unused in
225    // modern versions of the LLVM coverage mapping format, and are always 0.
226    // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation>
227    // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp`.
228    let covmap_header = cx.const_struct(
229        &[
230            cx.const_u32(0), // (unused)
231            cx.const_u32(filenames_buffer.len() as u32),
232            cx.const_u32(0), // (unused)
233            cx.const_u32(version),
234        ],
235        /* packed */ false,
236    );
237    let covmap_record = cx
238        .const_struct(&[covmap_header, cx.const_bytes(filenames_buffer)], /* packed */ false);
239
240    let covmap_global =
241        llvm::add_global(cx.llmod, cx.val_ty(covmap_record), &llvm_cov::covmap_var_name());
242    llvm::set_initializer(covmap_global, covmap_record);
243    llvm::set_global_constant(covmap_global, true);
244    llvm::set_linkage(covmap_global, llvm::Linkage::PrivateLinkage);
245    llvm::set_section(covmap_global, &llvm_cov::covmap_section_name(cx.llmod));
246    // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
247    // <https://llvm.org/docs/CoverageMappingFormat.html>
248    llvm::set_alignment(covmap_global, Align::EIGHT);
249
250    cx.add_used_global(covmap_global);
251}
252
253/// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
254/// But since we don't want unused functions to disappear from coverage reports, we also scan for
255/// functions that were instrumented but are not participating in codegen.
256///
257/// These unused functions don't need to be codegenned, but we do need to add them to the function
258/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
259/// We also end up adding their symbol names to a special global array that LLVM will include in
260/// its embedded coverage data.
261fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<ty::Instance<'tcx>> {
262    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
263
264    let tcx = cx.tcx;
265    let usage = prepare_usage_sets(tcx);
266
267    let is_unused_fn = |def_id: LocalDefId| -> bool {
268        // Usage sets expect `DefId`, so convert from `LocalDefId`.
269        let d: DefId = LocalDefId::to_def_id(def_id);
270        // To be potentially eligible for "unused function" mappings, a definition must:
271        // - Be eligible for coverage instrumentation
272        // - Not participate directly in codegen (or have lost all its coverage statements)
273        // - Not have any coverage statements inlined into codegenned functions
274        tcx.is_eligible_for_coverage(def_id)
275            && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
276            && !usage.used_via_inlining.contains(&d)
277    };
278
279    // FIXME(#79651): Consider trying to filter out dummy instantiations of
280    // unused generic functions from library crates, because they can produce
281    // "unused instantiation" in coverage reports even when they are actually
282    // used by some downstream crate in the same binary.
283
284    tcx.mir_keys(())
285        .iter()
286        .copied()
287        .filter(|&def_id| is_unused_fn(def_id))
288        .map(|def_id| make_dummy_instance(tcx, def_id))
289        .collect::<Vec<_>>()
290}
291
292struct UsageSets<'tcx> {
293    all_mono_items: &'tcx DefIdSet,
294    used_via_inlining: FxHashSet<DefId>,
295    missing_own_coverage: FxHashSet<DefId>,
296}
297
298/// Prepare sets of definitions that are relevant to deciding whether something
299/// is an "unused function" for coverage purposes.
300fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
301    let MonoItemPartitions { all_mono_items, codegen_units, .. } =
302        tcx.collect_and_partition_mono_items(());
303
304    // Obtain a MIR body for each function participating in codegen, via an
305    // arbitrary instance.
306    let mut def_ids_seen = FxHashSet::default();
307    let def_and_mir_for_all_mono_fns = codegen_units
308        .iter()
309        .flat_map(|cgu| cgu.items().keys())
310        .filter_map(|item| match item {
311            mir::mono::MonoItem::Fn(instance) => Some(instance),
312            mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None,
313        })
314        // We only need one arbitrary instance per definition.
315        .filter(move |instance| def_ids_seen.insert(instance.def_id()))
316        .map(|instance| {
317            // We don't care about the instance, just its underlying MIR.
318            let body = tcx.instance_mir(instance.def);
319            (instance.def_id(), body)
320        });
321
322    // Functions whose coverage statements were found inlined into other functions.
323    let mut used_via_inlining = FxHashSet::default();
324    // Functions that were instrumented, but had all of their coverage statements
325    // removed by later MIR transforms (e.g. UnreachablePropagation).
326    let mut missing_own_coverage = FxHashSet::default();
327
328    for (def_id, body) in def_and_mir_for_all_mono_fns {
329        let mut saw_own_coverage = false;
330
331        // Inspect every coverage statement in the function's MIR.
332        for stmt in body
333            .basic_blocks
334            .iter()
335            .flat_map(|block| &block.statements)
336            .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_)))
337        {
338            if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) {
339                // This coverage statement was inlined from another function.
340                used_via_inlining.insert(inlined.def_id());
341            } else {
342                // Non-inlined coverage statements belong to the enclosing function.
343                saw_own_coverage = true;
344            }
345        }
346
347        if !saw_own_coverage && body.function_coverage_info.is_some() {
348            missing_own_coverage.insert(def_id);
349        }
350    }
351
352    UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
353}
354
355fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
356    let def_id = local_def_id.to_def_id();
357
358    // Make a dummy instance that fills in all generics with placeholders.
359    ty::Instance::new(
360        def_id,
361        ty::GenericArgs::for_item(tcx, def_id, |param, _| {
362            if let ty::GenericParamDefKind::Lifetime = param.kind {
363                tcx.lifetimes.re_erased.into()
364            } else {
365                tcx.mk_param_from_def(param)
366            }
367        }),
368    )
369}