rustc_passes/
reachable.rs

1//! Finds local items that are "reachable", which means that other crates need access to their
2//! compiled code or their *runtime* MIR. (Compile-time MIR is always encoded anyway, so we don't
3//! worry about that here.)
4//!
5//! An item is "reachable" if codegen that happens in downstream crates can end up referencing this
6//! item. This obviously includes all public items. However, some of these items cannot be codegen'd
7//! (because they are generic), and for some the compiled code is not sufficient (because we want to
8//! cross-crate inline them). These items "need cross-crate MIR". When a reachable function `f`
9//! needs cross-crate MIR, then its MIR may be codegen'd in a downstream crate, and hence items it
10//! mentions need to be considered reachable.
11//!
12//! Furthermore, if a `const`/`const fn` is reachable, then it can return pointers to other items,
13//! making those reachable as well. For instance, consider a `const fn` returning a pointer to an
14//! otherwise entirely private function: if a downstream crate calls that `const fn` to compute the
15//! initial value of a `static`, then it needs to generate a direct reference to this function --
16//! i.e., the function is directly reachable from that downstream crate! Hence we have to recurse
17//! into `const` and `const fn`.
18//!
19//! Conversely, reachability *stops* when it hits a monomorphic non-`const` function that we do not
20//! want to cross-crate inline. That function will just be codegen'd in this crate, which means the
21//! monomorphization collector will consider it a root and then do another graph traversal to
22//! codegen everything called by this function -- but that's a very different graph from what we are
23//! considering here as at that point, everything is monomorphic.
24
25use hir::def_id::LocalDefIdSet;
26use rustc_data_structures::stack::ensure_sufficient_stack;
27use rustc_hir as hir;
28use rustc_hir::Node;
29use rustc_hir::def::{DefKind, Res};
30use rustc_hir::def_id::{DefId, LocalDefId};
31use rustc_hir::intravisit::{self, Visitor};
32use rustc_middle::bug;
33use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
34use rustc_middle::middle::privacy::{self, Level};
35use rustc_middle::mir::interpret::{ConstAllocation, ErrorHandled, GlobalAlloc};
36use rustc_middle::query::Providers;
37use rustc_middle::ty::{self, ExistentialTraitRef, TyCtxt};
38use rustc_privacy::DefIdVisitor;
39use rustc_session::config::CrateType;
40use tracing::debug;
41
42/// Determines whether this item is recursive for reachability. See `is_recursively_reachable_local`
43/// below for details.
44fn recursively_reachable(tcx: TyCtxt<'_>, def_id: DefId) -> bool {
45    tcx.generics_of(def_id).requires_monomorphization(tcx)
46        || tcx.cross_crate_inlinable(def_id)
47        || tcx.is_const_fn(def_id)
48}
49
50// Information needed while computing reachability.
51struct ReachableContext<'tcx> {
52    // The type context.
53    tcx: TyCtxt<'tcx>,
54    maybe_typeck_results: Option<&'tcx ty::TypeckResults<'tcx>>,
55    // The set of items which must be exported in the linkage sense.
56    reachable_symbols: LocalDefIdSet,
57    // A worklist of item IDs. Each item ID in this worklist will be inlined
58    // and will be scanned for further references.
59    // FIXME(eddyb) benchmark if this would be faster as a `VecDeque`.
60    worklist: Vec<LocalDefId>,
61    // Whether any output of this compilation is a library
62    any_library: bool,
63}
64
65impl<'tcx> Visitor<'tcx> for ReachableContext<'tcx> {
66    fn visit_nested_body(&mut self, body: hir::BodyId) {
67        let old_maybe_typeck_results =
68            self.maybe_typeck_results.replace(self.tcx.typeck_body(body));
69        let body = self.tcx.hir_body(body);
70        self.visit_body(body);
71        self.maybe_typeck_results = old_maybe_typeck_results;
72    }
73
74    fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) {
75        let res = match expr.kind {
76            hir::ExprKind::Path(ref qpath) => {
77                // This covers fn ptr casts but also "non-method" calls.
78                Some(self.typeck_results().qpath_res(qpath, expr.hir_id))
79            }
80            hir::ExprKind::MethodCall(..) => {
81                // Method calls don't involve a full "path", so we need to determine the callee
82                // based on the receiver type.
83                // If this is a method call on a generic type, we might not be able to find the
84                // callee. That's why `reachable_set` also adds all potential callees for such
85                // calls, i.e. all trait impl items, to the reachable set. So here we only worry
86                // about the calls we can identify.
87                self.typeck_results()
88                    .type_dependent_def(expr.hir_id)
89                    .map(|(kind, def_id)| Res::Def(kind, def_id))
90            }
91            hir::ExprKind::Closure(&hir::Closure { def_id, .. }) => {
92                self.reachable_symbols.insert(def_id);
93                None
94            }
95            _ => None,
96        };
97
98        if let Some(res) = res {
99            self.propagate_item(res);
100        }
101
102        intravisit::walk_expr(self, expr)
103    }
104
105    fn visit_inline_asm(&mut self, asm: &'tcx hir::InlineAsm<'tcx>, id: hir::HirId) {
106        for (op, _) in asm.operands {
107            if let hir::InlineAsmOperand::SymStatic { def_id, .. } = op {
108                if let Some(def_id) = def_id.as_local() {
109                    self.reachable_symbols.insert(def_id);
110                }
111            }
112        }
113        intravisit::walk_inline_asm(self, asm, id);
114    }
115}
116
117impl<'tcx> ReachableContext<'tcx> {
118    /// Gets the type-checking results for the current body.
119    /// As this will ICE if called outside bodies, only call when working with
120    /// `Expr` or `Pat` nodes (they are guaranteed to be found only in bodies).
121    #[track_caller]
122    fn typeck_results(&self) -> &'tcx ty::TypeckResults<'tcx> {
123        self.maybe_typeck_results
124            .expect("`ReachableContext::typeck_results` called outside of body")
125    }
126
127    /// Returns true if the given def ID represents a local item that is recursive for reachability,
128    /// i.e. whether everything mentioned in here also needs to be considered reachable.
129    ///
130    /// There are two reasons why an item may be recursively reachable:
131    /// - It needs cross-crate MIR (see the module-level doc comment above).
132    /// - It is a `const` or `const fn`. This is *not* because we need the MIR to interpret them
133    ///   (MIR for const-eval and MIR for codegen is separate, and MIR for const-eval is always
134    ///   encoded). Instead, it is because `const fn` can create `fn()` pointers to other items
135    ///   which end up in the evaluated result of the constant and can then be called from other
136    ///   crates. Those items must be considered reachable.
137    fn is_recursively_reachable_local(&self, def_id: DefId) -> bool {
138        let Some(def_id) = def_id.as_local() else {
139            return false;
140        };
141
142        match self.tcx.hir_node_by_def_id(def_id) {
143            Node::Item(item) => match item.kind {
144                hir::ItemKind::Fn { .. } => recursively_reachable(self.tcx, def_id.into()),
145                _ => false,
146            },
147            Node::TraitItem(trait_method) => match trait_method.kind {
148                hir::TraitItemKind::Const(_, ref default) => default.is_some(),
149                hir::TraitItemKind::Fn(_, hir::TraitFn::Provided(_)) => true,
150                hir::TraitItemKind::Fn(_, hir::TraitFn::Required(_))
151                | hir::TraitItemKind::Type(..) => false,
152            },
153            Node::ImplItem(impl_item) => match impl_item.kind {
154                hir::ImplItemKind::Const(..) => true,
155                hir::ImplItemKind::Fn(..) => {
156                    recursively_reachable(self.tcx, impl_item.hir_id().owner.to_def_id())
157                }
158                hir::ImplItemKind::Type(_) => false,
159            },
160            Node::Expr(&hir::Expr { kind: hir::ExprKind::Closure(..), .. }) => true,
161            _ => false,
162        }
163    }
164
165    // Step 2: Mark all symbols that the symbols on the worklist touch.
166    fn propagate(&mut self) {
167        let mut scanned = LocalDefIdSet::default();
168        while let Some(search_item) = self.worklist.pop() {
169            if !scanned.insert(search_item) {
170                continue;
171            }
172
173            self.propagate_node(&self.tcx.hir_node_by_def_id(search_item), search_item);
174        }
175    }
176
177    fn propagate_node(&mut self, node: &Node<'tcx>, search_item: LocalDefId) {
178        if !self.any_library {
179            // If we are building an executable, only explicitly extern
180            // types need to be exported.
181            let codegen_attrs = if self.tcx.def_kind(search_item).has_codegen_attrs() {
182                self.tcx.codegen_fn_attrs(search_item)
183            } else {
184                CodegenFnAttrs::EMPTY
185            };
186            let is_extern = codegen_attrs.contains_extern_indicator();
187            let std_internal =
188                codegen_attrs.flags.contains(CodegenFnAttrFlags::RUSTC_STD_INTERNAL_SYMBOL);
189            if is_extern || std_internal {
190                self.reachable_symbols.insert(search_item);
191            }
192        } else {
193            // If we are building a library, then reachable symbols will
194            // continue to participate in linkage after this product is
195            // produced. In this case, we traverse the ast node, recursing on
196            // all reachable nodes from this one.
197            self.reachable_symbols.insert(search_item);
198        }
199
200        match *node {
201            Node::Item(item) => {
202                match item.kind {
203                    hir::ItemKind::Fn { body, .. } => {
204                        if recursively_reachable(self.tcx, item.owner_id.into()) {
205                            self.visit_nested_body(body);
206                        }
207                    }
208
209                    hir::ItemKind::Const(_, _, init) => {
210                        // Only things actually ending up in the final constant value are reachable
211                        // for codegen. Everything else is only needed during const-eval, so even if
212                        // const-eval happens in a downstream crate, all they need is
213                        // `mir_for_ctfe`.
214                        match self.tcx.const_eval_poly_to_alloc(item.owner_id.def_id.into()) {
215                            Ok(alloc) => {
216                                let alloc = self.tcx.global_alloc(alloc.alloc_id).unwrap_memory();
217                                self.propagate_from_alloc(alloc);
218                            }
219                            // We can't figure out which value the constant will evaluate to. In
220                            // lieu of that, we have to consider everything mentioned in the const
221                            // initializer reachable, since it *may* end up in the final value.
222                            Err(ErrorHandled::TooGeneric(_)) => self.visit_nested_body(init),
223                            // If there was an error evaluating the const, nothing can be reachable
224                            // via it, and anyway compilation will fail.
225                            Err(ErrorHandled::Reported(..)) => {}
226                        }
227                    }
228                    hir::ItemKind::Static(..) => {
229                        if let Ok(alloc) = self.tcx.eval_static_initializer(item.owner_id.def_id) {
230                            self.propagate_from_alloc(alloc);
231                        }
232                    }
233
234                    // These are normal, nothing reachable about these
235                    // inherently and their children are already in the
236                    // worklist, as determined by the privacy pass
237                    hir::ItemKind::ExternCrate(_)
238                    | hir::ItemKind::Use(..)
239                    | hir::ItemKind::TyAlias(..)
240                    | hir::ItemKind::Macro(..)
241                    | hir::ItemKind::Mod(..)
242                    | hir::ItemKind::ForeignMod { .. }
243                    | hir::ItemKind::Impl { .. }
244                    | hir::ItemKind::Trait(..)
245                    | hir::ItemKind::TraitAlias(..)
246                    | hir::ItemKind::Struct(..)
247                    | hir::ItemKind::Enum(..)
248                    | hir::ItemKind::Union(..)
249                    | hir::ItemKind::GlobalAsm { .. } => {}
250                }
251            }
252            Node::TraitItem(trait_method) => {
253                match trait_method.kind {
254                    hir::TraitItemKind::Const(_, None)
255                    | hir::TraitItemKind::Fn(_, hir::TraitFn::Required(_)) => {
256                        // Keep going, nothing to get exported
257                    }
258                    hir::TraitItemKind::Const(_, Some(body_id))
259                    | hir::TraitItemKind::Fn(_, hir::TraitFn::Provided(body_id)) => {
260                        self.visit_nested_body(body_id);
261                    }
262                    hir::TraitItemKind::Type(..) => {}
263                }
264            }
265            Node::ImplItem(impl_item) => match impl_item.kind {
266                hir::ImplItemKind::Const(_, body) => {
267                    self.visit_nested_body(body);
268                }
269                hir::ImplItemKind::Fn(_, body) => {
270                    if recursively_reachable(self.tcx, impl_item.hir_id().owner.to_def_id()) {
271                        self.visit_nested_body(body)
272                    }
273                }
274                hir::ImplItemKind::Type(_) => {}
275            },
276            Node::Expr(&hir::Expr {
277                kind: hir::ExprKind::Closure(&hir::Closure { body, .. }),
278                ..
279            }) => {
280                self.visit_nested_body(body);
281            }
282            // Nothing to recurse on for these
283            Node::ForeignItem(_)
284            | Node::Variant(_)
285            | Node::Ctor(..)
286            | Node::Field(_)
287            | Node::Ty(_)
288            | Node::Crate(_)
289            | Node::Synthetic
290            | Node::OpaqueTy(..) => {}
291            _ => {
292                bug!(
293                    "found unexpected node kind in worklist: {} ({:?})",
294                    self.tcx.hir_id_to_string(self.tcx.local_def_id_to_hir_id(search_item)),
295                    node,
296                );
297            }
298        }
299    }
300
301    /// Finds things to add to `reachable_symbols` within allocations.
302    /// In contrast to visit_nested_body this ignores things that were only needed to evaluate
303    /// the allocation.
304    fn propagate_from_alloc(&mut self, alloc: ConstAllocation<'tcx>) {
305        if !self.any_library {
306            return;
307        }
308        for (_, prov) in alloc.0.provenance().ptrs().iter() {
309            match self.tcx.global_alloc(prov.alloc_id()) {
310                GlobalAlloc::Static(def_id) => {
311                    self.propagate_item(Res::Def(self.tcx.def_kind(def_id), def_id))
312                }
313                GlobalAlloc::Function { instance, .. } => {
314                    // Manually visit to actually see the instance's `DefId`. Type visitors won't see it
315                    self.propagate_item(Res::Def(
316                        self.tcx.def_kind(instance.def_id()),
317                        instance.def_id(),
318                    ));
319                    self.visit(instance.args);
320                }
321                GlobalAlloc::VTable(ty, dyn_ty) => {
322                    self.visit(ty);
323                    // Manually visit to actually see the trait's `DefId`. Type visitors won't see it
324                    if let Some(trait_ref) = dyn_ty.principal() {
325                        let ExistentialTraitRef { def_id, args, .. } = trait_ref.skip_binder();
326                        self.visit_def_id(def_id, "", &"");
327                        self.visit(args);
328                    }
329                }
330                GlobalAlloc::Memory(alloc) => self.propagate_from_alloc(alloc),
331            }
332        }
333    }
334
335    fn propagate_item(&mut self, res: Res) {
336        let Res::Def(kind, def_id) = res else { return };
337        let Some(def_id) = def_id.as_local() else { return };
338        match kind {
339            DefKind::Static { nested: true, .. } => {
340                // This is the main purpose of this function: add the def_id we find
341                // to `reachable_symbols`.
342                if self.reachable_symbols.insert(def_id) {
343                    if let Ok(alloc) = self.tcx.eval_static_initializer(def_id) {
344                        // This cannot cause infinite recursion, because we abort by inserting into the
345                        // work list once we hit a normal static. Nested statics, even if they somehow
346                        // become recursive, are also not infinitely recursing, because of the
347                        // `reachable_symbols` check above.
348                        // We still need to protect against stack overflow due to deeply nested statics.
349                        ensure_sufficient_stack(|| self.propagate_from_alloc(alloc));
350                    }
351                }
352            }
353            // Reachable constants and reachable statics can have their contents inlined
354            // into other crates. Mark them as reachable and recurse into their body.
355            DefKind::Const | DefKind::AssocConst | DefKind::Static { .. } => {
356                self.worklist.push(def_id);
357            }
358            _ => {
359                if self.is_recursively_reachable_local(def_id.to_def_id()) {
360                    self.worklist.push(def_id);
361                } else {
362                    self.reachable_symbols.insert(def_id);
363                }
364            }
365        }
366    }
367}
368
369impl<'tcx> DefIdVisitor<'tcx> for ReachableContext<'tcx> {
370    type Result = ();
371
372    fn tcx(&self) -> TyCtxt<'tcx> {
373        self.tcx
374    }
375
376    fn visit_def_id(
377        &mut self,
378        def_id: DefId,
379        _kind: &str,
380        _descr: &dyn std::fmt::Display,
381    ) -> Self::Result {
382        self.propagate_item(Res::Def(self.tcx.def_kind(def_id), def_id))
383    }
384}
385
386fn check_item<'tcx>(
387    tcx: TyCtxt<'tcx>,
388    id: hir::ItemId,
389    worklist: &mut Vec<LocalDefId>,
390    effective_visibilities: &privacy::EffectiveVisibilities,
391) {
392    if has_custom_linkage(tcx, id.owner_id.def_id) {
393        worklist.push(id.owner_id.def_id);
394    }
395
396    if !matches!(tcx.def_kind(id.owner_id), DefKind::Impl { of_trait: true }) {
397        return;
398    }
399
400    // We need only trait impls here, not inherent impls, and only non-exported ones
401    if effective_visibilities.is_reachable(id.owner_id.def_id) {
402        return;
403    }
404
405    let items = tcx.associated_item_def_ids(id.owner_id);
406    worklist.extend(items.iter().map(|ii_ref| ii_ref.expect_local()));
407
408    let Some(trait_def_id) = tcx.trait_id_of_impl(id.owner_id.to_def_id()) else {
409        unreachable!();
410    };
411
412    if !trait_def_id.is_local() {
413        return;
414    }
415
416    worklist
417        .extend(tcx.provided_trait_methods(trait_def_id).map(|assoc| assoc.def_id.expect_local()));
418}
419
420fn has_custom_linkage(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
421    // Anything which has custom linkage gets thrown on the worklist no
422    // matter where it is in the crate, along with "special std symbols"
423    // which are currently akin to allocator symbols.
424    if !tcx.def_kind(def_id).has_codegen_attrs() {
425        return false;
426    }
427    let codegen_attrs = tcx.codegen_fn_attrs(def_id);
428    codegen_attrs.contains_extern_indicator()
429        || codegen_attrs.flags.contains(CodegenFnAttrFlags::RUSTC_STD_INTERNAL_SYMBOL)
430        // FIXME(nbdd0121): `#[used]` are marked as reachable here so it's picked up by
431        // `linked_symbols` in cg_ssa. They won't be exported in binary or cdylib due to their
432        // `SymbolExportLevel::Rust` export level but may end up being exported in dylibs.
433        || codegen_attrs.flags.contains(CodegenFnAttrFlags::USED)
434        || codegen_attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER)
435}
436
437/// See module-level doc comment above.
438fn reachable_set(tcx: TyCtxt<'_>, (): ()) -> LocalDefIdSet {
439    let effective_visibilities = &tcx.effective_visibilities(());
440
441    let any_library = tcx
442        .crate_types()
443        .iter()
444        .any(|ty| *ty == CrateType::Rlib || *ty == CrateType::Dylib || *ty == CrateType::ProcMacro);
445    let mut reachable_context = ReachableContext {
446        tcx,
447        maybe_typeck_results: None,
448        reachable_symbols: Default::default(),
449        worklist: Vec::new(),
450        any_library,
451    };
452
453    // Step 1: Seed the worklist with all nodes which were found to be public as
454    //         a result of the privacy pass along with all local lang items and impl items.
455    //         If other crates link to us, they're going to expect to be able to
456    //         use the lang items, so we need to be sure to mark them as
457    //         exported.
458    reachable_context.worklist = effective_visibilities
459        .iter()
460        .filter_map(|(&id, effective_vis)| {
461            effective_vis.is_public_at_level(Level::ReachableThroughImplTrait).then_some(id)
462        })
463        .collect::<Vec<_>>();
464
465    for (_, def_id) in tcx.lang_items().iter() {
466        if let Some(def_id) = def_id.as_local() {
467            reachable_context.worklist.push(def_id);
468        }
469    }
470    {
471        // As explained above, we have to mark all functions called from reachable
472        // `item_might_be_inlined` items as reachable. The issue is, when those functions are
473        // generic and call a trait method, we have no idea where that call goes! So, we
474        // conservatively mark all trait impl items as reachable.
475        // FIXME: One possible strategy for pruning the reachable set is to avoid marking impl
476        // items of non-exported traits (or maybe all local traits?) unless their respective
477        // trait items are used from inlinable code through method call syntax or UFCS, or their
478        // trait is a lang item.
479        // (But if you implement this, don't forget to take into account that vtables can also
480        // make trait methods reachable!)
481        let crate_items = tcx.hir_crate_items(());
482
483        for id in crate_items.free_items() {
484            check_item(tcx, id, &mut reachable_context.worklist, effective_visibilities);
485        }
486
487        for id in crate_items.impl_items() {
488            if has_custom_linkage(tcx, id.owner_id.def_id) {
489                reachable_context.worklist.push(id.owner_id.def_id);
490            }
491        }
492    }
493
494    // Step 2: Mark all symbols that the symbols on the worklist touch.
495    reachable_context.propagate();
496
497    debug!("Inline reachability shows: {:?}", reachable_context.reachable_symbols);
498
499    // Return the set of reachable symbols.
500    reachable_context.reachable_symbols
501}
502
503pub(crate) fn provide(providers: &mut Providers) {
504    *providers = Providers { reachable_set, ..*providers };
505}