Skip to main content

rustdoc/html/
span_map.rs

1use std::path::{Path, PathBuf};
2
3use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
4use rustc_hir as hir;
5use rustc_hir::def::{DefKind, Res};
6use rustc_hir::def_id::{DefId, LOCAL_CRATE};
7use rustc_hir::intravisit::{self, Visitor, VisitorExt};
8use rustc_hir::{ExprKind, HirId, Item, ItemKind, Mod, Node, QPath};
9use rustc_middle::hir::nested_filter;
10use rustc_middle::ty::{self, TyCtxt};
11use rustc_span::{BytePos, ExpnKind};
12
13use crate::clean::{self, PrimitiveType, rustc_span};
14use crate::html::sources;
15
16/// This is a stripped down version of [`rustc_span::Span`] that only contains the start and end byte positions of the span.
17///
18/// Profiling showed that the `Span` interner was taking up a lot of the run-time when highlighting, and since we
19/// never actually use the context and parent that are stored in a normal `Span`, we can replace its usages with this
20/// one, which is much cheaper to construct.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub(crate) struct Span {
23    lo: BytePos,
24    hi: BytePos,
25}
26
27impl From<rustc_span::Span> for Span {
28    fn from(value: rustc_span::Span) -> Self {
29        Self { lo: value.lo(), hi: value.hi() }
30    }
31}
32
33impl Span {
34    pub(crate) fn lo(self) -> BytePos {
35        self.lo
36    }
37
38    pub(crate) fn hi(self) -> BytePos {
39        self.hi
40    }
41
42    pub(crate) fn with_lo(self, lo: BytePos) -> Self {
43        Self { lo, hi: self.hi() }
44    }
45
46    pub(crate) fn with_hi(self, hi: BytePos) -> Self {
47        Self { lo: self.lo(), hi }
48    }
49}
50
51pub(crate) const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0) };
52
53/// This enum allows us to store two different kinds of information:
54///
55/// In case the `span` definition comes from the same crate, we can simply get the `span` and use
56/// it as is.
57///
58/// Otherwise, we store the definition `DefId` and will generate a link to the documentation page
59/// instead of the source code directly.
60#[derive(Debug)]
61pub(crate) enum LinkFromSrc {
62    Local(clean::Span),
63    External(DefId),
64    Primitive(PrimitiveType),
65    Doc(DefId),
66}
67
68/// This function will do at most two things:
69///
70/// 1. Generate a `span` correspondence map which links an item `span` to its definition `span`.
71/// 2. Collect the source code files.
72///
73/// It returns the source code files and the `span` correspondence map.
74///
75/// Note about the `span` correspondence map: the keys are actually `(lo, hi)` of `span`s. We don't
76/// need the `span` context later on, only their position, so instead of keeping a whole `Span`, we
77/// only keep the `lo` and `hi`.
78pub(crate) fn collect_spans_and_sources(
79    tcx: TyCtxt<'_>,
80    krate: &clean::Crate,
81    src_root: &Path,
82    include_sources: bool,
83    generate_link_to_definition: bool,
84) -> (FxIndexMap<PathBuf, String>, FxHashMap<Span, LinkFromSrc>) {
85    if include_sources {
86        let mut visitor =
87            SpanMapVisitor { tcx, maybe_typeck_results: None, matches: FxHashMap::default() };
88
89        if generate_link_to_definition {
90            tcx.hir_walk_toplevel_module(&mut visitor);
91        }
92        let sources = sources::collect_local_sources(tcx, src_root, krate);
93        (sources, visitor.matches)
94    } else {
95        (Default::default(), Default::default())
96    }
97}
98
99struct SpanMapVisitor<'tcx> {
100    pub(crate) tcx: TyCtxt<'tcx>,
101    pub(crate) maybe_typeck_results: Option<LazyTypeckResults<'tcx>>,
102    pub(crate) matches: FxHashMap<Span, LinkFromSrc>,
103}
104
105impl<'tcx> SpanMapVisitor<'tcx> {
106    /// Returns the typeck results of the current body if we're in one.
107    ///
108    /// This will typeck the body if it hasn't been already. Since rustdoc intentionally doesn't run
109    /// all semantic analysis passes on function bodies at the time of writing, this can lead to us
110    /// "suddenly" rejecting the user's code under `--generate-link-to-definition` while accepting
111    /// it if that flag isn't passed! So use this method sparingly and think about the consequences
112    /// including performance!
113    ///
114    /// This behavior is documented in the rustdoc book. Ideally, it wouldn't be that way but no
115    /// good solution has been found so far. Don't think about adding some sort of flag to rustc to
116    /// suppress diagnostic emission that would be unsound wrt. `ErrorGuaranteed`[^1] and generally
117    /// be quite hacky!
118    ///
119    /// [^1]: Historical context:
120    /// <https://github.com/rust-lang/rust/issues/69426#issuecomment-1019412352>.
121    fn maybe_typeck_results(&mut self) -> Option<&'tcx ty::TypeckResults<'tcx>> {
122        let results = self.maybe_typeck_results.as_mut()?;
123        let results = results.cache.get_or_insert_with(|| self.tcx.typeck_body(results.body_id));
124        Some(results)
125    }
126
127    fn link_for_def(&self, def_id: DefId) -> LinkFromSrc {
128        if def_id.is_local() {
129            LinkFromSrc::Local(rustc_span(def_id, self.tcx))
130        } else {
131            LinkFromSrc::External(def_id)
132        }
133    }
134
135    /// This function is where we handle `hir::Path` elements and add them into the "span map".
136    fn handle_path(&mut self, path: &hir::Path<'_>, only_use_last_segment: bool) {
137        match path.res {
138            // FIXME: Properly support type parameters. Note they resolve just fine. The issue is
139            // that our highlighter would then also linkify their *definition site* for some reason
140            // linking them to themselves. Const parameters don't exhibit this issue.
141            Res::Def(DefKind::TyParam, _) => {}
142            Res::Def(_, def_id) => {
143                // The segments can be empty for `use *;` in a non-crate-root scope in Rust 2015.
144                let span = path.segments.last().map_or(path.span, |seg| seg.ident.span);
145                // In case the path ends with generics, we remove them from the span.
146                let span = if only_use_last_segment {
147                    span
148                } else {
149                    // In `use` statements, the included item is not in the path segments. However,
150                    // it doesn't matter because you can't have generics on `use` statements.
151                    if path.span.contains(span) { path.span.with_hi(span.hi()) } else { path.span }
152                };
153                self.matches.insert(span.into(), self.link_for_def(def_id));
154            }
155            Res::Local(_) if let Some(span) = self.tcx.hir_res_span(path.res) => {
156                let path_span = if only_use_last_segment {
157                    path.segments.last().unwrap().ident.span
158                } else {
159                    path.span
160                };
161                self.matches.insert(path_span.into(), LinkFromSrc::Local(clean::Span::new(span)));
162            }
163            Res::PrimTy(p) => {
164                // FIXME: Doesn't handle "path-like" primitives like arrays or tuples.
165                self.matches
166                    .insert(path.span.into(), LinkFromSrc::Primitive(PrimitiveType::from(p)));
167            }
168            _ => {}
169        }
170    }
171
172    /// Used to generate links on items' definition to go to their documentation page.
173    pub(crate) fn extract_info_from_hir_id(&mut self, hir_id: HirId) {
174        if let Node::Item(item) = self.tcx.hir_node(hir_id)
175            && let Some(span) = self.tcx.def_ident_span(item.owner_id)
176        {
177            let cspan = clean::Span::new(span);
178            // If the span isn't from the current crate, we ignore it.
179            if cspan.inner().is_dummy() || cspan.cnum(self.tcx.sess) != LOCAL_CRATE {
180                return;
181            }
182            self.matches.insert(span.into(), LinkFromSrc::Doc(item.owner_id.to_def_id()));
183        }
184    }
185
186    /// Adds the macro call into the span map. Returns `true` if the `span` was inside a macro
187    /// expansion, whether or not it was added to the span map.
188    ///
189    /// The idea for the macro support is to check if the current `Span` comes from expansion. If
190    /// so, we loop until we find the macro definition by using `outer_expn_data` in a loop.
191    /// Finally, we get the information about the macro itself (`span` if "local", `DefId`
192    /// otherwise) and store it inside the span map.
193    fn handle_macro(&mut self, span: rustc_span::Span) -> bool {
194        if !span.from_expansion() {
195            return false;
196        }
197        // So if the `span` comes from a macro expansion, we need to get the original
198        // macro's `DefId`.
199        let mut data = span.ctxt().outer_expn_data();
200        let mut call_site = data.call_site;
201        // Macros can expand to code containing macros, which will in turn be expanded, etc.
202        // So the idea here is to "go up" until we're back to code that was generated from
203        // macro expansion so that we can get the `DefId` of the original macro that was at the
204        // origin of this expansion.
205        while call_site.from_expansion() {
206            data = call_site.ctxt().outer_expn_data();
207            call_site = data.call_site;
208        }
209
210        let macro_name = match data.kind {
211            ExpnKind::Macro(_, macro_name) => macro_name,
212            // Even though we don't handle this kind of macro, this `data` still comes from
213            // expansion so we return `true` so we don't go any deeper in this code.
214            _ => return true,
215        };
216        let link_from_src = match data.macro_def_id {
217            Some(macro_def_id) => {
218                if macro_def_id.is_local() {
219                    LinkFromSrc::Local(clean::Span::new(data.def_site))
220                } else {
221                    LinkFromSrc::External(macro_def_id)
222                }
223            }
224            None => return true,
225        };
226        let new_span = data.call_site;
227        let macro_name = macro_name.as_str();
228        // The "call_site" includes the whole macro with its "arguments". We only want
229        // the macro name.
230        let new_span = new_span.with_hi(new_span.lo() + BytePos(macro_name.len() as u32));
231        self.matches.insert(new_span.into(), link_from_src);
232        true
233    }
234}
235
236impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
237    type NestedFilter = nested_filter::All;
238
239    fn maybe_tcx(&mut self) -> Self::MaybeTyCtxt {
240        self.tcx
241    }
242
243    fn visit_nested_body(&mut self, body_id: hir::BodyId) -> Self::Result {
244        let maybe_typeck_results =
245            self.maybe_typeck_results.replace(LazyTypeckResults { body_id, cache: None });
246        self.visit_body(self.tcx.hir_body(body_id));
247        self.maybe_typeck_results = maybe_typeck_results;
248    }
249
250    fn visit_anon_const(&mut self, ct: &'tcx hir::AnonConst) {
251        // FIXME: Typeck'ing anon consts leads to ICEs in rustc if the parent body wasn't typeck'ed
252        //        yet. See #156418. Figure out what the best and proper solution for this is. Until
253        //        then, let's prevent `typeck` from being called on anon consts by not setting
254        //        `maybe_typeck_results` to `Some(_)`.
255        let maybe_typeck_results = self.maybe_typeck_results.take();
256        self.visit_body(self.tcx.hir_body(ct.body));
257        self.maybe_typeck_results = maybe_typeck_results;
258    }
259
260    fn visit_path(&mut self, path: &hir::Path<'tcx>, _id: HirId) {
261        if self.handle_macro(path.span) {
262            return;
263        }
264        self.handle_path(path, false);
265        intravisit::walk_path(self, path);
266    }
267
268    fn visit_qpath(&mut self, qpath: &QPath<'tcx>, id: HirId, _span: rustc_span::Span) {
269        match *qpath {
270            QPath::TypeRelative(qself, segment) => {
271                // FIXME: This doesn't work for paths in *types* since HIR ty lowering currently
272                //        doesn't write back the resolution of type-relative paths. Updating it to
273                //        do so should be a simple fix.
274                // FIXME: This obviously doesn't support item signatures / non-bodies. Sadly, rustc
275                //        currently doesn't keep around that information & thus can't provide an API
276                //        for it.
277                //        `ItemCtxt`s would need a place to write back the resolution of type-
278                //        dependent definitions. Ideally there was some sort of query keyed on the
279                //        `LocalDefId` of the owning item that returns some table with which we can
280                //        map the `HirId` to a `DefId`.
281                //        Of course, we could re-HIR-ty-lower such paths *here* if we were to extend
282                //        the public API of HIR analysis. However, I strongly advise against it as
283                //        it would be too much of a hack.
284                if let Some(typeck_results) = self.maybe_typeck_results() {
285                    let path = hir::Path {
286                        // We change the span to not include parens.
287                        span: segment.ident.span,
288                        res: typeck_results.qpath_res(qpath, id),
289                        segments: std::slice::from_ref(segment),
290                    };
291                    self.handle_path(&path, false);
292                }
293
294                rustc_ast::visit::try_visit!(self.visit_ty_unambig(qself));
295                self.visit_path_segment(segment);
296            }
297            QPath::Resolved(maybe_qself, path) => {
298                self.handle_path(path, true);
299
300                rustc_ast::visit::visit_opt!(self, visit_ty_unambig, maybe_qself);
301                if !self.handle_macro(path.span) {
302                    intravisit::walk_path(self, path);
303                }
304            }
305        }
306    }
307
308    fn visit_mod(&mut self, m: &'tcx Mod<'tcx>, span: rustc_span::Span, id: HirId) {
309        // To make the difference between "mod foo {}" and "mod foo;". In case we "import" another
310        // file, we want to link to it. Otherwise no need to create a link.
311        if !span.overlaps(m.spans.inner_span) {
312            // Now that we confirmed it's a file import, we want to get the span for the module
313            // name only and not all the "mod foo;".
314            if let Node::Item(item) = self.tcx.hir_node(id) {
315                let (ident, _) = item.expect_mod();
316                self.matches.insert(
317                    ident.span.into(),
318                    LinkFromSrc::Local(clean::Span::new(m.spans.inner_span)),
319                );
320            }
321        } else {
322            // If it's a "mod foo {}", we want to look to its documentation page.
323            self.extract_info_from_hir_id(id);
324        }
325        intravisit::walk_mod(self, m);
326    }
327
328    fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) {
329        match expr.kind {
330            ExprKind::MethodCall(segment, ..) => {
331                if let Some(typeck_results) = self.maybe_typeck_results()
332                    && let Some(def_id) = typeck_results.type_dependent_def_id(expr.hir_id)
333                {
334                    self.matches.insert(segment.ident.span.into(), self.link_for_def(def_id));
335                }
336            }
337            // We don't want to go deeper into the macro.
338            _ if self.handle_macro(expr.span) => return,
339            _ => {}
340        }
341        intravisit::walk_expr(self, expr);
342    }
343
344    fn visit_item(&mut self, item: &'tcx Item<'tcx>) {
345        // We're no longer in a body since we've crossed an item boundary.
346        // Temporarily take away the typeck results which are only valid in bodies.
347        let maybe_typeck_results = self.maybe_typeck_results.take();
348
349        match item.kind {
350            ItemKind::Static(..)
351            | ItemKind::Const(..)
352            | ItemKind::Fn { .. }
353            | ItemKind::Macro(..)
354            | ItemKind::TyAlias(..)
355            | ItemKind::Enum(..)
356            | ItemKind::Struct(..)
357            | ItemKind::Union(..)
358            | ItemKind::Trait { .. }
359            | ItemKind::TraitAlias(..) => self.extract_info_from_hir_id(item.hir_id()),
360            ItemKind::Impl(_)
361            | ItemKind::Use(..)
362            | ItemKind::ExternCrate(..)
363            | ItemKind::ForeignMod { .. }
364            | ItemKind::GlobalAsm { .. }
365            // We already have "visit_mod" above so no need to check it here.
366            | ItemKind::Mod(..) => {}
367        }
368
369        intravisit::walk_item(self, item);
370
371        self.maybe_typeck_results = maybe_typeck_results;
372    }
373}
374
375/// Lazily computed & cached [`ty::TypeckResults`].
376struct LazyTypeckResults<'tcx> {
377    body_id: hir::BodyId,
378    cache: Option<&'tcx ty::TypeckResults<'tcx>>,
379}