Skip to main content

rustdoc/html/
span_map.rs

1use std::path::{Path, PathBuf};
2
3use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
4use rustc_hir as hir;
5use rustc_hir::def::{DefKind, Res};
6use rustc_hir::def_id::{DefId, LOCAL_CRATE};
7use rustc_hir::intravisit::{self, Visitor, VisitorExt};
8use rustc_hir::{ExprKind, HirId, Item, ItemKind, Mod, Node, QPath};
9use rustc_middle::hir::nested_filter;
10use rustc_middle::ty::{self, TyCtxt};
11use rustc_span::{BytePos, ExpnKind};
12
13use crate::clean::{self, PrimitiveType, rustc_span};
14use crate::html::sources;
15
16/// This is a stripped down version of [`rustc_span::Span`] that only contains the start and end byte positions of the span.
17///
18/// Profiling showed that the `Span` interner was taking up a lot of the run-time when highlighting, and since we
19/// never actually use the context and parent that are stored in a normal `Span`, we can replace its usages with this
20/// one, which is much cheaper to construct.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub(crate) struct Span {
23    lo: BytePos,
24    hi: BytePos,
25}
26
27impl From<rustc_span::Span> for Span {
28    fn from(value: rustc_span::Span) -> Self {
29        Self { lo: value.lo(), hi: value.hi() }
30    }
31}
32
33impl Span {
34    pub(crate) fn lo(self) -> BytePos {
35        self.lo
36    }
37
38    pub(crate) fn hi(self) -> BytePos {
39        self.hi
40    }
41
42    pub(crate) fn with_lo(self, lo: BytePos) -> Self {
43        Self { lo, hi: self.hi() }
44    }
45
46    pub(crate) fn with_hi(self, hi: BytePos) -> Self {
47        Self { lo: self.lo(), hi }
48    }
49}
50
51pub(crate) const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0) };
52
53/// This enum allows us to store two different kinds of information:
54///
55/// In case the `span` definition comes from the same crate, we can simply get the `span` and use
56/// it as is.
57///
58/// Otherwise, we store the definition `DefId` and will generate a link to the documentation page
59/// instead of the source code directly.
60#[derive(Debug)]
61pub(crate) enum LinkFromSrc {
62    Local(clean::Span),
63    External(DefId),
64    Primitive(PrimitiveType),
65    Doc(DefId),
66}
67
68/// This function will do at most two things:
69///
70/// 1. Generate a `span` correspondence map which links an item `span` to its definition `span`.
71/// 2. Collect the source code files.
72///
73/// It returns the source code files and the `span` correspondence map.
74///
75/// Note about the `span` correspondence map: the keys are actually `(lo, hi)` of `span`s. We don't
76/// need the `span` context later on, only their position, so instead of keeping a whole `Span`, we
77/// only keep the `lo` and `hi`.
78pub(crate) fn collect_spans_and_sources(
79    tcx: TyCtxt<'_>,
80    krate: &clean::Crate,
81    src_root: &Path,
82    include_sources: bool,
83    generate_link_to_definition: bool,
84) -> (FxIndexMap<PathBuf, String>, FxHashMap<Span, LinkFromSrc>) {
85    if include_sources {
86        let mut visitor =
87            SpanMapVisitor { tcx, maybe_typeck_results: None, matches: FxHashMap::default() };
88
89        if generate_link_to_definition {
90            tcx.hir_walk_toplevel_module(&mut visitor);
91        }
92        let sources = sources::collect_local_sources(tcx, src_root, krate);
93        (sources, visitor.matches)
94    } else {
95        (Default::default(), Default::default())
96    }
97}
98
99struct SpanMapVisitor<'tcx> {
100    pub(crate) tcx: TyCtxt<'tcx>,
101    pub(crate) maybe_typeck_results: Option<LazyTypeckResults<'tcx>>,
102    pub(crate) matches: FxHashMap<Span, LinkFromSrc>,
103}
104
105impl<'tcx> SpanMapVisitor<'tcx> {
106    /// Returns the typeck results of the current body if we're in one.
107    ///
108    /// This will typeck the body if it hasn't been already. Since rustdoc intentionally doesn't run
109    /// all semantic analysis passes on function bodies at the time of writing, this can lead to us
110    /// "suddenly" rejecting the user's code under `--generate-link-to-definition` while accepting
111    /// it if that flag isn't passed! So use this method sparingly and think about the consequences
112    /// including performance!
113    ///
114    /// This behavior is documented in the rustdoc book. Ideally, it wouldn't be that way but no
115    /// good solution has been found so far. Don't think about adding some sort of flag to rustc to
116    /// suppress diagnostic emission that would be unsound wrt. `ErrorGuaranteed`[^1] and generally
117    /// be quite hacky!
118    ///
119    /// [^1]: Historical context:
120    /// <https://github.com/rust-lang/rust/issues/69426#issuecomment-1019412352>.
121    fn maybe_typeck_results(&mut self) -> Option<&'tcx ty::TypeckResults<'tcx>> {
122        let results = self.maybe_typeck_results.as_mut()?;
123        let results = results.cache.get_or_insert_with(|| self.tcx.typeck_body(results.body_id));
124        Some(results)
125    }
126
127    fn link_for_def(&self, def_id: DefId) -> LinkFromSrc {
128        if def_id.is_local() {
129            LinkFromSrc::Local(rustc_span(def_id, self.tcx))
130        } else {
131            LinkFromSrc::External(def_id)
132        }
133    }
134
135    /// This function is where we handle `hir::Path` elements and add them into the "span map".
136    fn handle_path(&mut self, path: &hir::Path<'_>, only_use_last_segment: bool) {
137        match path.res {
138            // FIXME: Properly support type parameters. Note they resolve just fine. The issue is
139            // that our highlighter would then also linkify their *definition site* for some reason
140            // linking them to themselves. Const parameters don't exhibit this issue.
141            Res::Def(DefKind::TyParam, _) => {}
142            Res::Def(_, def_id) => {
143                // The segments can be empty for `use *;` in a non-crate-root scope in Rust 2015.
144                let span = path.segments.last().map_or(path.span, |seg| seg.ident.span);
145                // In case the path ends with generics, we remove them from the span.
146                let span = if only_use_last_segment {
147                    if path.span.from_expansion() {
148                        // For now we don't handle span from macro expansions so nothing to do here.
149                        return;
150                    }
151                    span
152                } else {
153                    // In `use` statements, the included item is not in the path segments. However,
154                    // it doesn't matter because you can't have generics on `use` statements.
155                    if path.span.contains(span) { path.span.with_hi(span.hi()) } else { path.span }
156                };
157                self.matches.insert(span.into(), self.link_for_def(def_id));
158            }
159            Res::Local(_) if let Some(span) = self.tcx.hir_res_span(path.res) => {
160                let path_span = if only_use_last_segment {
161                    path.segments.last().unwrap().ident.span
162                } else {
163                    path.span
164                };
165                self.matches.insert(path_span.into(), LinkFromSrc::Local(clean::Span::new(span)));
166            }
167            Res::PrimTy(p) => {
168                // FIXME: Doesn't handle "path-like" primitives like arrays or tuples.
169                self.matches
170                    .insert(path.span.into(), LinkFromSrc::Primitive(PrimitiveType::from(p)));
171            }
172            _ => {}
173        }
174    }
175
176    /// Used to generate links on items' definition to go to their documentation page.
177    pub(crate) fn extract_info_from_hir_id(&mut self, hir_id: HirId) {
178        if let Node::Item(item) = self.tcx.hir_node(hir_id)
179            && let Some(span) = self.tcx.def_ident_span(item.owner_id)
180        {
181            let cspan = clean::Span::new(span);
182            // If the span isn't from the current crate, we ignore it.
183            if cspan.inner().is_dummy() || cspan.cnum(self.tcx.sess) != LOCAL_CRATE {
184                return;
185            }
186            self.matches.insert(span.into(), LinkFromSrc::Doc(item.owner_id.to_def_id()));
187        }
188    }
189
190    /// Adds the macro call into the span map. Returns `true` if the `span` was inside a macro
191    /// expansion, whether or not it was added to the span map.
192    ///
193    /// The idea for the macro support is to check if the current `Span` comes from expansion. If
194    /// so, we loop until we find the macro definition by using `outer_expn_data` in a loop.
195    /// Finally, we get the information about the macro itself (`span` if "local", `DefId`
196    /// otherwise) and store it inside the span map.
197    fn handle_macro(&mut self, span: rustc_span::Span) -> bool {
198        if !span.from_expansion() {
199            return false;
200        }
201        // So if the `span` comes from a macro expansion, we need to get the original
202        // macro's `DefId`.
203        let mut data = span.ctxt().outer_expn_data();
204        let mut call_site = data.call_site;
205        // Macros can expand to code containing macros, which will in turn be expanded, etc.
206        // So the idea here is to "go up" until we're back to code that was generated from
207        // macro expansion so that we can get the `DefId` of the original macro that was at the
208        // origin of this expansion.
209        while call_site.from_expansion() {
210            data = call_site.ctxt().outer_expn_data();
211            call_site = data.call_site;
212        }
213
214        let macro_name = match data.kind {
215            ExpnKind::Macro(_, macro_name) => macro_name,
216            // Even though we don't handle this kind of macro, this `data` still comes from
217            // expansion so we return `true` so we don't go any deeper in this code.
218            _ => return true,
219        };
220        let link_from_src = match data.macro_def_id {
221            Some(macro_def_id) => {
222                if macro_def_id.is_local() {
223                    LinkFromSrc::Local(clean::Span::new(data.def_site))
224                } else {
225                    LinkFromSrc::External(macro_def_id)
226                }
227            }
228            None => return true,
229        };
230        let new_span = data.call_site;
231        let macro_name = macro_name.as_str();
232        // The "call_site" includes the whole macro with its "arguments". We only want
233        // the macro name.
234        let new_span = new_span.with_hi(new_span.lo() + BytePos(macro_name.len() as u32));
235        self.matches.insert(new_span.into(), link_from_src);
236        true
237    }
238}
239
240impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
241    type NestedFilter = nested_filter::All;
242
243    fn maybe_tcx(&mut self) -> Self::MaybeTyCtxt {
244        self.tcx
245    }
246
247    fn visit_nested_body(&mut self, body_id: hir::BodyId) -> Self::Result {
248        let maybe_typeck_results =
249            self.maybe_typeck_results.replace(LazyTypeckResults { body_id, cache: None });
250        self.visit_body(self.tcx.hir_body(body_id));
251        self.maybe_typeck_results = maybe_typeck_results;
252    }
253
254    fn visit_anon_const(&mut self, ct: &'tcx hir::AnonConst) {
255        // FIXME: Typeck'ing anon consts leads to ICEs in rustc if the parent body wasn't typeck'ed
256        //        yet. See #156418. Figure out what the best and proper solution for this is. Until
257        //        then, let's prevent `typeck` from being called on anon consts by not setting
258        //        `maybe_typeck_results` to `Some(_)`.
259        let maybe_typeck_results = self.maybe_typeck_results.take();
260        self.visit_body(self.tcx.hir_body(ct.body));
261        self.maybe_typeck_results = maybe_typeck_results;
262    }
263
264    fn visit_path(&mut self, path: &hir::Path<'tcx>, _id: HirId) {
265        if self.handle_macro(path.span) {
266            return;
267        }
268        self.handle_path(path, false);
269        intravisit::walk_path(self, path);
270    }
271
272    fn visit_qpath(&mut self, qpath: &QPath<'tcx>, id: HirId, _span: rustc_span::Span) {
273        match *qpath {
274            QPath::TypeRelative(qself, segment) => {
275                // FIXME: This doesn't work for paths in *types* since HIR ty lowering currently
276                //        doesn't write back the resolution of type-relative paths. Updating it to
277                //        do so should be a simple fix.
278                // FIXME: This obviously doesn't support item signatures / non-bodies. Sadly, rustc
279                //        currently doesn't keep around that information & thus can't provide an API
280                //        for it.
281                //        `ItemCtxt`s would need a place to write back the resolution of type-
282                //        dependent definitions. Ideally there was some sort of query keyed on the
283                //        `LocalDefId` of the owning item that returns some table with which we can
284                //        map the `HirId` to a `DefId`.
285                //        Of course, we could re-HIR-ty-lower such paths *here* if we were to extend
286                //        the public API of HIR analysis. However, I strongly advise against it as
287                //        it would be too much of a hack.
288                if let Some(typeck_results) = self.maybe_typeck_results() {
289                    let path = hir::Path {
290                        // We change the span to not include parens.
291                        span: segment.ident.span,
292                        res: typeck_results.qpath_res(qpath, id),
293                        segments: std::slice::from_ref(segment),
294                    };
295                    self.handle_path(&path, false);
296                }
297
298                rustc_ast::visit::try_visit!(self.visit_ty_unambig(qself));
299                self.visit_path_segment(segment);
300            }
301            QPath::Resolved(maybe_qself, path) => {
302                self.handle_path(path, true);
303
304                rustc_ast::visit::visit_opt!(self, visit_ty_unambig, maybe_qself);
305                if !self.handle_macro(path.span) {
306                    intravisit::walk_path(self, path);
307                }
308            }
309        }
310    }
311
312    fn visit_mod(&mut self, m: &'tcx Mod<'tcx>, span: rustc_span::Span, id: HirId) {
313        // To make the difference between "mod foo {}" and "mod foo;". In case we "import" another
314        // file, we want to link to it. Otherwise no need to create a link.
315        if !span.overlaps(m.spans.inner_span) {
316            // Now that we confirmed it's a file import, we want to get the span for the module
317            // name only and not all the "mod foo;".
318            if let Node::Item(item) = self.tcx.hir_node(id) {
319                let (ident, _) = item.expect_mod();
320                self.matches.insert(
321                    ident.span.into(),
322                    LinkFromSrc::Local(clean::Span::new(m.spans.inner_span)),
323                );
324            }
325        } else {
326            // If it's a "mod foo {}", we want to look to its documentation page.
327            self.extract_info_from_hir_id(id);
328        }
329        intravisit::walk_mod(self, m);
330    }
331
332    fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) {
333        match expr.kind {
334            ExprKind::MethodCall(segment, ..) => {
335                if let Some(typeck_results) = self.maybe_typeck_results()
336                    && let Some(def_id) = typeck_results.type_dependent_def_id(expr.hir_id)
337                {
338                    self.matches.insert(segment.ident.span.into(), self.link_for_def(def_id));
339                }
340            }
341            // We don't want to go deeper into the macro.
342            _ if self.handle_macro(expr.span) => return,
343            _ => {}
344        }
345        intravisit::walk_expr(self, expr);
346    }
347
348    fn visit_item(&mut self, item: &'tcx Item<'tcx>) {
349        // We're no longer in a body since we've crossed an item boundary.
350        // Temporarily take away the typeck results which are only valid in bodies.
351        let maybe_typeck_results = self.maybe_typeck_results.take();
352
353        match item.kind {
354            ItemKind::Static(..)
355            | ItemKind::Const(..)
356            | ItemKind::Fn { .. }
357            | ItemKind::Macro(..)
358            | ItemKind::TyAlias(..)
359            | ItemKind::Enum(..)
360            | ItemKind::Struct(..)
361            | ItemKind::Union(..)
362            | ItemKind::Trait { .. }
363            | ItemKind::TraitAlias(..) => self.extract_info_from_hir_id(item.hir_id()),
364            ItemKind::Impl(_)
365            | ItemKind::Use(..)
366            | ItemKind::ExternCrate(..)
367            | ItemKind::ForeignMod { .. }
368            | ItemKind::GlobalAsm { .. }
369            // We already have "visit_mod" above so no need to check it here.
370            | ItemKind::Mod(..) => {}
371        }
372
373        intravisit::walk_item(self, item);
374
375        self.maybe_typeck_results = maybe_typeck_results;
376    }
377}
378
379/// Lazily computed & cached [`ty::TypeckResults`].
380struct LazyTypeckResults<'tcx> {
381    body_id: hir::BodyId,
382    cache: Option<&'tcx ty::TypeckResults<'tcx>>,
383}