rustdoc/html/span_map.rs
1use std::path::{Path, PathBuf};
2
3use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
4use rustc_hir as hir;
5use rustc_hir::def::{DefKind, Res};
6use rustc_hir::def_id::{DefId, LOCAL_CRATE};
7use rustc_hir::intravisit::{self, Visitor, VisitorExt};
8use rustc_hir::{ExprKind, HirId, Item, ItemKind, Mod, Node, QPath};
9use rustc_middle::hir::nested_filter;
10use rustc_middle::ty::{self, TyCtxt};
11use rustc_span::{BytePos, ExpnKind};
12
13use crate::clean::{self, PrimitiveType, rustc_span};
14use crate::html::sources;
15
16/// This is a stripped down version of [`rustc_span::Span`] that only contains the start and end byte positions of the span.
17///
18/// Profiling showed that the `Span` interner was taking up a lot of the run-time when highlighting, and since we
19/// never actually use the context and parent that are stored in a normal `Span`, we can replace its usages with this
20/// one, which is much cheaper to construct.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub(crate) struct Span {
23 lo: BytePos,
24 hi: BytePos,
25}
26
27impl From<rustc_span::Span> for Span {
28 fn from(value: rustc_span::Span) -> Self {
29 Self { lo: value.lo(), hi: value.hi() }
30 }
31}
32
33impl Span {
34 pub(crate) fn lo(self) -> BytePos {
35 self.lo
36 }
37
38 pub(crate) fn hi(self) -> BytePos {
39 self.hi
40 }
41
42 pub(crate) fn with_lo(self, lo: BytePos) -> Self {
43 Self { lo, hi: self.hi() }
44 }
45
46 pub(crate) fn with_hi(self, hi: BytePos) -> Self {
47 Self { lo: self.lo(), hi }
48 }
49}
50
51pub(crate) const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0) };
52
53/// This enum allows us to store two different kinds of information:
54///
55/// In case the `span` definition comes from the same crate, we can simply get the `span` and use
56/// it as is.
57///
58/// Otherwise, we store the definition `DefId` and will generate a link to the documentation page
59/// instead of the source code directly.
60#[derive(Debug)]
61pub(crate) enum LinkFromSrc {
62 Local(clean::Span),
63 External(DefId),
64 Primitive(PrimitiveType),
65 Doc(DefId),
66}
67
68/// This function will do at most two things:
69///
70/// 1. Generate a `span` correspondence map which links an item `span` to its definition `span`.
71/// 2. Collect the source code files.
72///
73/// It returns the source code files and the `span` correspondence map.
74///
75/// Note about the `span` correspondence map: the keys are actually `(lo, hi)` of `span`s. We don't
76/// need the `span` context later on, only their position, so instead of keeping a whole `Span`, we
77/// only keep the `lo` and `hi`.
78pub(crate) fn collect_spans_and_sources(
79 tcx: TyCtxt<'_>,
80 krate: &clean::Crate,
81 src_root: &Path,
82 include_sources: bool,
83 generate_link_to_definition: bool,
84) -> (FxIndexMap<PathBuf, String>, FxHashMap<Span, LinkFromSrc>) {
85 if include_sources {
86 let mut visitor =
87 SpanMapVisitor { tcx, maybe_typeck_results: None, matches: FxHashMap::default() };
88
89 if generate_link_to_definition {
90 tcx.hir_walk_toplevel_module(&mut visitor);
91 }
92 let sources = sources::collect_local_sources(tcx, src_root, krate);
93 (sources, visitor.matches)
94 } else {
95 (Default::default(), Default::default())
96 }
97}
98
99struct SpanMapVisitor<'tcx> {
100 pub(crate) tcx: TyCtxt<'tcx>,
101 pub(crate) maybe_typeck_results: Option<LazyTypeckResults<'tcx>>,
102 pub(crate) matches: FxHashMap<Span, LinkFromSrc>,
103}
104
105impl<'tcx> SpanMapVisitor<'tcx> {
106 /// Returns the typeck results of the current body if we're in one.
107 ///
108 /// This will typeck the body if it hasn't been already. Since rustdoc intentionally doesn't run
109 /// all semantic analysis passes on function bodies at the time of writing, this can lead to us
110 /// "suddenly" rejecting the user's code under `--generate-link-to-definition` while accepting
111 /// it if that flag isn't passed! So use this method sparingly and think about the consequences
112 /// including performance!
113 ///
114 /// This behavior is documented in the rustdoc book. Ideally, it wouldn't be that way but no
115 /// good solution has been found so far. Don't think about adding some sort of flag to rustc to
116 /// suppress diagnostic emission that would be unsound wrt. `ErrorGuaranteed`[^1] and generally
117 /// be quite hacky!
118 ///
119 /// [^1]: Historical context:
120 /// <https://github.com/rust-lang/rust/issues/69426#issuecomment-1019412352>.
121 fn maybe_typeck_results(&mut self) -> Option<&'tcx ty::TypeckResults<'tcx>> {
122 let results = self.maybe_typeck_results.as_mut()?;
123 let results = results.cache.get_or_insert_with(|| self.tcx.typeck_body(results.body_id));
124 Some(results)
125 }
126
127 fn link_for_def(&self, def_id: DefId) -> LinkFromSrc {
128 if def_id.is_local() {
129 LinkFromSrc::Local(rustc_span(def_id, self.tcx))
130 } else {
131 LinkFromSrc::External(def_id)
132 }
133 }
134
135 /// This function is where we handle `hir::Path` elements and add them into the "span map".
136 fn handle_path(&mut self, path: &hir::Path<'_>, only_use_last_segment: bool) {
137 match path.res {
138 // FIXME: Properly support type parameters. Note they resolve just fine. The issue is
139 // that our highlighter would then also linkify their *definition site* for some reason
140 // linking them to themselves. Const parameters don't exhibit this issue.
141 Res::Def(DefKind::TyParam, _) => {}
142 Res::Def(_, def_id) => {
143 // The segments can be empty for `use *;` in a non-crate-root scope in Rust 2015.
144 let span = path.segments.last().map_or(path.span, |seg| seg.ident.span);
145 // In case the path ends with generics, we remove them from the span.
146 let span = if only_use_last_segment {
147 if path.span.from_expansion() {
148 // For now we don't handle span from macro expansions so nothing to do here.
149 return;
150 }
151 span
152 } else {
153 // In `use` statements, the included item is not in the path segments. However,
154 // it doesn't matter because you can't have generics on `use` statements.
155 if path.span.contains(span) { path.span.with_hi(span.hi()) } else { path.span }
156 };
157 self.matches.insert(span.into(), self.link_for_def(def_id));
158 }
159 Res::Local(_) if let Some(span) = self.tcx.hir_res_span(path.res) => {
160 let path_span = if only_use_last_segment {
161 path.segments.last().unwrap().ident.span
162 } else {
163 path.span
164 };
165 self.matches.insert(path_span.into(), LinkFromSrc::Local(clean::Span::new(span)));
166 }
167 Res::PrimTy(p) => {
168 // FIXME: Doesn't handle "path-like" primitives like arrays or tuples.
169 self.matches
170 .insert(path.span.into(), LinkFromSrc::Primitive(PrimitiveType::from(p)));
171 }
172 _ => {}
173 }
174 }
175
176 /// Used to generate links on items' definition to go to their documentation page.
177 pub(crate) fn extract_info_from_hir_id(&mut self, hir_id: HirId) {
178 if let Node::Item(item) = self.tcx.hir_node(hir_id)
179 && let Some(span) = self.tcx.def_ident_span(item.owner_id)
180 {
181 let cspan = clean::Span::new(span);
182 // If the span isn't from the current crate, we ignore it.
183 if cspan.inner().is_dummy() || cspan.cnum(self.tcx.sess) != LOCAL_CRATE {
184 return;
185 }
186 self.matches.insert(span.into(), LinkFromSrc::Doc(item.owner_id.to_def_id()));
187 }
188 }
189
190 /// Adds the macro call into the span map. Returns `true` if the `span` was inside a macro
191 /// expansion, whether or not it was added to the span map.
192 ///
193 /// The idea for the macro support is to check if the current `Span` comes from expansion. If
194 /// so, we loop until we find the macro definition by using `outer_expn_data` in a loop.
195 /// Finally, we get the information about the macro itself (`span` if "local", `DefId`
196 /// otherwise) and store it inside the span map.
197 fn handle_macro(&mut self, span: rustc_span::Span) -> bool {
198 if !span.from_expansion() {
199 return false;
200 }
201 // So if the `span` comes from a macro expansion, we need to get the original
202 // macro's `DefId`.
203 let mut data = span.ctxt().outer_expn_data();
204 let mut call_site = data.call_site;
205 // Macros can expand to code containing macros, which will in turn be expanded, etc.
206 // So the idea here is to "go up" until we're back to code that was generated from
207 // macro expansion so that we can get the `DefId` of the original macro that was at the
208 // origin of this expansion.
209 while call_site.from_expansion() {
210 data = call_site.ctxt().outer_expn_data();
211 call_site = data.call_site;
212 }
213
214 let macro_name = match data.kind {
215 ExpnKind::Macro(_, macro_name) => macro_name,
216 // Even though we don't handle this kind of macro, this `data` still comes from
217 // expansion so we return `true` so we don't go any deeper in this code.
218 _ => return true,
219 };
220 let link_from_src = match data.macro_def_id {
221 Some(macro_def_id) => {
222 if macro_def_id.is_local() {
223 LinkFromSrc::Local(clean::Span::new(data.def_site))
224 } else {
225 LinkFromSrc::External(macro_def_id)
226 }
227 }
228 None => return true,
229 };
230 let new_span = data.call_site;
231 let macro_name = macro_name.as_str();
232 // The "call_site" includes the whole macro with its "arguments". We only want
233 // the macro name.
234 let new_span = new_span.with_hi(new_span.lo() + BytePos(macro_name.len() as u32));
235 self.matches.insert(new_span.into(), link_from_src);
236 true
237 }
238}
239
240impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
241 type NestedFilter = nested_filter::All;
242
243 fn maybe_tcx(&mut self) -> Self::MaybeTyCtxt {
244 self.tcx
245 }
246
247 fn visit_nested_body(&mut self, body_id: hir::BodyId) -> Self::Result {
248 let maybe_typeck_results =
249 self.maybe_typeck_results.replace(LazyTypeckResults { body_id, cache: None });
250 self.visit_body(self.tcx.hir_body(body_id));
251 self.maybe_typeck_results = maybe_typeck_results;
252 }
253
254 fn visit_anon_const(&mut self, ct: &'tcx hir::AnonConst) {
255 // FIXME: Typeck'ing anon consts leads to ICEs in rustc if the parent body wasn't typeck'ed
256 // yet. See #156418. Figure out what the best and proper solution for this is. Until
257 // then, let's prevent `typeck` from being called on anon consts by not setting
258 // `maybe_typeck_results` to `Some(_)`.
259 let maybe_typeck_results = self.maybe_typeck_results.take();
260 self.visit_body(self.tcx.hir_body(ct.body));
261 self.maybe_typeck_results = maybe_typeck_results;
262 }
263
264 fn visit_path(&mut self, path: &hir::Path<'tcx>, _id: HirId) {
265 if self.handle_macro(path.span) {
266 return;
267 }
268 self.handle_path(path, false);
269 intravisit::walk_path(self, path);
270 }
271
272 fn visit_qpath(&mut self, qpath: &QPath<'tcx>, id: HirId, _span: rustc_span::Span) {
273 match *qpath {
274 QPath::TypeRelative(qself, segment) => {
275 // FIXME: This doesn't work for paths in *types* since HIR ty lowering currently
276 // doesn't write back the resolution of type-relative paths. Updating it to
277 // do so should be a simple fix.
278 // FIXME: This obviously doesn't support item signatures / non-bodies. Sadly, rustc
279 // currently doesn't keep around that information & thus can't provide an API
280 // for it.
281 // `ItemCtxt`s would need a place to write back the resolution of type-
282 // dependent definitions. Ideally there was some sort of query keyed on the
283 // `LocalDefId` of the owning item that returns some table with which we can
284 // map the `HirId` to a `DefId`.
285 // Of course, we could re-HIR-ty-lower such paths *here* if we were to extend
286 // the public API of HIR analysis. However, I strongly advise against it as
287 // it would be too much of a hack.
288 if let Some(typeck_results) = self.maybe_typeck_results() {
289 let path = hir::Path {
290 // We change the span to not include parens.
291 span: segment.ident.span,
292 res: typeck_results.qpath_res(qpath, id),
293 segments: std::slice::from_ref(segment),
294 };
295 self.handle_path(&path, false);
296 }
297
298 rustc_ast::visit::try_visit!(self.visit_ty_unambig(qself));
299 self.visit_path_segment(segment);
300 }
301 QPath::Resolved(maybe_qself, path) => {
302 self.handle_path(path, true);
303
304 rustc_ast::visit::visit_opt!(self, visit_ty_unambig, maybe_qself);
305 if !self.handle_macro(path.span) {
306 intravisit::walk_path(self, path);
307 }
308 }
309 }
310 }
311
312 fn visit_mod(&mut self, m: &'tcx Mod<'tcx>, span: rustc_span::Span, id: HirId) {
313 // To make the difference between "mod foo {}" and "mod foo;". In case we "import" another
314 // file, we want to link to it. Otherwise no need to create a link.
315 if !span.overlaps(m.spans.inner_span) {
316 // Now that we confirmed it's a file import, we want to get the span for the module
317 // name only and not all the "mod foo;".
318 if let Node::Item(item) = self.tcx.hir_node(id) {
319 let (ident, _) = item.expect_mod();
320 self.matches.insert(
321 ident.span.into(),
322 LinkFromSrc::Local(clean::Span::new(m.spans.inner_span)),
323 );
324 }
325 } else {
326 // If it's a "mod foo {}", we want to look to its documentation page.
327 self.extract_info_from_hir_id(id);
328 }
329 intravisit::walk_mod(self, m);
330 }
331
332 fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) {
333 match expr.kind {
334 ExprKind::MethodCall(segment, ..) => {
335 if let Some(typeck_results) = self.maybe_typeck_results()
336 && let Some(def_id) = typeck_results.type_dependent_def_id(expr.hir_id)
337 {
338 self.matches.insert(segment.ident.span.into(), self.link_for_def(def_id));
339 }
340 }
341 // We don't want to go deeper into the macro.
342 _ if self.handle_macro(expr.span) => return,
343 _ => {}
344 }
345 intravisit::walk_expr(self, expr);
346 }
347
348 fn visit_item(&mut self, item: &'tcx Item<'tcx>) {
349 // We're no longer in a body since we've crossed an item boundary.
350 // Temporarily take away the typeck results which are only valid in bodies.
351 let maybe_typeck_results = self.maybe_typeck_results.take();
352
353 match item.kind {
354 ItemKind::Static(..)
355 | ItemKind::Const(..)
356 | ItemKind::Fn { .. }
357 | ItemKind::Macro(..)
358 | ItemKind::TyAlias(..)
359 | ItemKind::Enum(..)
360 | ItemKind::Struct(..)
361 | ItemKind::Union(..)
362 | ItemKind::Trait { .. }
363 | ItemKind::TraitAlias(..) => self.extract_info_from_hir_id(item.hir_id()),
364 ItemKind::Impl(_)
365 | ItemKind::Use(..)
366 | ItemKind::ExternCrate(..)
367 | ItemKind::ForeignMod { .. }
368 | ItemKind::GlobalAsm { .. }
369 // We already have "visit_mod" above so no need to check it here.
370 | ItemKind::Mod(..) => {}
371 }
372
373 intravisit::walk_item(self, item);
374
375 self.maybe_typeck_results = maybe_typeck_results;
376 }
377}
378
379/// Lazily computed & cached [`ty::TypeckResults`].
380struct LazyTypeckResults<'tcx> {
381 body_id: hir::BodyId,
382 cache: Option<&'tcx ty::TypeckResults<'tcx>>,
383}