rustdoc/html/span_map.rs
1use std::path::{Path, PathBuf};
2
3use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
4use rustc_hir as hir;
5use rustc_hir::def::{DefKind, Res};
6use rustc_hir::def_id::{DefId, LOCAL_CRATE};
7use rustc_hir::intravisit::{self, Visitor, VisitorExt};
8use rustc_hir::{ExprKind, HirId, Item, ItemKind, Mod, Node, QPath};
9use rustc_middle::hir::nested_filter;
10use rustc_middle::ty::{self, TyCtxt};
11use rustc_span::{BytePos, ExpnKind};
12
13use crate::clean::{self, PrimitiveType, rustc_span};
14use crate::html::sources;
15
16/// This is a stripped down version of [`rustc_span::Span`] that only contains the start and end byte positions of the span.
17///
18/// Profiling showed that the `Span` interner was taking up a lot of the run-time when highlighting, and since we
19/// never actually use the context and parent that are stored in a normal `Span`, we can replace its usages with this
20/// one, which is much cheaper to construct.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
22pub(crate) struct Span {
23 lo: BytePos,
24 hi: BytePos,
25}
26
27impl From<rustc_span::Span> for Span {
28 fn from(value: rustc_span::Span) -> Self {
29 Self { lo: value.lo(), hi: value.hi() }
30 }
31}
32
33impl Span {
34 pub(crate) fn lo(self) -> BytePos {
35 self.lo
36 }
37
38 pub(crate) fn hi(self) -> BytePos {
39 self.hi
40 }
41
42 pub(crate) fn with_lo(self, lo: BytePos) -> Self {
43 Self { lo, hi: self.hi() }
44 }
45
46 pub(crate) fn with_hi(self, hi: BytePos) -> Self {
47 Self { lo: self.lo(), hi }
48 }
49}
50
51pub(crate) const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0) };
52
53/// This enum allows us to store two different kinds of information:
54///
55/// In case the `span` definition comes from the same crate, we can simply get the `span` and use
56/// it as is.
57///
58/// Otherwise, we store the definition `DefId` and will generate a link to the documentation page
59/// instead of the source code directly.
60#[derive(Debug)]
61pub(crate) enum LinkFromSrc {
62 Local(clean::Span),
63 External(DefId),
64 Primitive(PrimitiveType),
65 Doc(DefId),
66}
67
68/// This function will do at most two things:
69///
70/// 1. Generate a `span` correspondence map which links an item `span` to its definition `span`.
71/// 2. Collect the source code files.
72///
73/// It returns the source code files and the `span` correspondence map.
74///
75/// Note about the `span` correspondence map: the keys are actually `(lo, hi)` of `span`s. We don't
76/// need the `span` context later on, only their position, so instead of keeping a whole `Span`, we
77/// only keep the `lo` and `hi`.
78pub(crate) fn collect_spans_and_sources(
79 tcx: TyCtxt<'_>,
80 krate: &clean::Crate,
81 src_root: &Path,
82 include_sources: bool,
83 generate_link_to_definition: bool,
84) -> (FxIndexMap<PathBuf, String>, FxHashMap<Span, LinkFromSrc>) {
85 if include_sources {
86 let mut visitor =
87 SpanMapVisitor { tcx, maybe_typeck_results: None, matches: FxHashMap::default() };
88
89 if generate_link_to_definition {
90 tcx.hir_walk_toplevel_module(&mut visitor);
91 }
92 let sources = sources::collect_local_sources(tcx, src_root, krate);
93 (sources, visitor.matches)
94 } else {
95 (Default::default(), Default::default())
96 }
97}
98
99struct SpanMapVisitor<'tcx> {
100 pub(crate) tcx: TyCtxt<'tcx>,
101 pub(crate) maybe_typeck_results: Option<LazyTypeckResults<'tcx>>,
102 pub(crate) matches: FxHashMap<Span, LinkFromSrc>,
103}
104
105impl<'tcx> SpanMapVisitor<'tcx> {
106 /// Returns the typeck results of the current body if we're in one.
107 ///
108 /// This will typeck the body if it hasn't been already. Since rustdoc intentionally doesn't run
109 /// all semantic analysis passes on function bodies at the time of writing, this can lead to us
110 /// "suddenly" rejecting the user's code under `--generate-link-to-definition` while accepting
111 /// it if that flag isn't passed! So use this method sparingly and think about the consequences
112 /// including performance!
113 ///
114 /// This behavior is documented in the rustdoc book. Ideally, it wouldn't be that way but no
115 /// good solution has been found so far. Don't think about adding some sort of flag to rustc to
116 /// suppress diagnostic emission that would be unsound wrt. `ErrorGuaranteed`[^1] and generally
117 /// be quite hacky!
118 ///
119 /// [^1]: Historical context:
120 /// <https://github.com/rust-lang/rust/issues/69426#issuecomment-1019412352>.
121 fn maybe_typeck_results(&mut self) -> Option<&'tcx ty::TypeckResults<'tcx>> {
122 let results = self.maybe_typeck_results.as_mut()?;
123 let results = results.cache.get_or_insert_with(|| self.tcx.typeck_body(results.body_id));
124 Some(results)
125 }
126
127 fn link_for_def(&self, def_id: DefId) -> LinkFromSrc {
128 if def_id.is_local() {
129 LinkFromSrc::Local(rustc_span(def_id, self.tcx))
130 } else {
131 LinkFromSrc::External(def_id)
132 }
133 }
134
135 /// This function is where we handle `hir::Path` elements and add them into the "span map".
136 fn handle_path(&mut self, path: &hir::Path<'_>, only_use_last_segment: bool) {
137 match path.res {
138 // FIXME: Properly support type parameters. Note they resolve just fine. The issue is
139 // that our highlighter would then also linkify their *definition site* for some reason
140 // linking them to themselves. Const parameters don't exhibit this issue.
141 Res::Def(DefKind::TyParam, _) => {}
142 Res::Def(_, def_id) => {
143 // The segments can be empty for `use *;` in a non-crate-root scope in Rust 2015.
144 let span = path.segments.last().map_or(path.span, |seg| seg.ident.span);
145 // In case the path ends with generics, we remove them from the span.
146 let span = if only_use_last_segment {
147 span
148 } else {
149 // In `use` statements, the included item is not in the path segments. However,
150 // it doesn't matter because you can't have generics on `use` statements.
151 if path.span.contains(span) { path.span.with_hi(span.hi()) } else { path.span }
152 };
153 self.matches.insert(span.into(), self.link_for_def(def_id));
154 }
155 Res::Local(_) if let Some(span) = self.tcx.hir_res_span(path.res) => {
156 let path_span = if only_use_last_segment {
157 path.segments.last().unwrap().ident.span
158 } else {
159 path.span
160 };
161 self.matches.insert(path_span.into(), LinkFromSrc::Local(clean::Span::new(span)));
162 }
163 Res::PrimTy(p) => {
164 // FIXME: Doesn't handle "path-like" primitives like arrays or tuples.
165 self.matches
166 .insert(path.span.into(), LinkFromSrc::Primitive(PrimitiveType::from(p)));
167 }
168 _ => {}
169 }
170 }
171
172 /// Used to generate links on items' definition to go to their documentation page.
173 pub(crate) fn extract_info_from_hir_id(&mut self, hir_id: HirId) {
174 if let Node::Item(item) = self.tcx.hir_node(hir_id)
175 && let Some(span) = self.tcx.def_ident_span(item.owner_id)
176 {
177 let cspan = clean::Span::new(span);
178 // If the span isn't from the current crate, we ignore it.
179 if cspan.inner().is_dummy() || cspan.cnum(self.tcx.sess) != LOCAL_CRATE {
180 return;
181 }
182 self.matches.insert(span.into(), LinkFromSrc::Doc(item.owner_id.to_def_id()));
183 }
184 }
185
186 /// Adds the macro call into the span map. Returns `true` if the `span` was inside a macro
187 /// expansion, whether or not it was added to the span map.
188 ///
189 /// The idea for the macro support is to check if the current `Span` comes from expansion. If
190 /// so, we loop until we find the macro definition by using `outer_expn_data` in a loop.
191 /// Finally, we get the information about the macro itself (`span` if "local", `DefId`
192 /// otherwise) and store it inside the span map.
193 fn handle_macro(&mut self, span: rustc_span::Span) -> bool {
194 if !span.from_expansion() {
195 return false;
196 }
197 // So if the `span` comes from a macro expansion, we need to get the original
198 // macro's `DefId`.
199 let mut data = span.ctxt().outer_expn_data();
200 let mut call_site = data.call_site;
201 // Macros can expand to code containing macros, which will in turn be expanded, etc.
202 // So the idea here is to "go up" until we're back to code that was generated from
203 // macro expansion so that we can get the `DefId` of the original macro that was at the
204 // origin of this expansion.
205 while call_site.from_expansion() {
206 data = call_site.ctxt().outer_expn_data();
207 call_site = data.call_site;
208 }
209
210 let macro_name = match data.kind {
211 ExpnKind::Macro(_, macro_name) => macro_name,
212 // Even though we don't handle this kind of macro, this `data` still comes from
213 // expansion so we return `true` so we don't go any deeper in this code.
214 _ => return true,
215 };
216 let link_from_src = match data.macro_def_id {
217 Some(macro_def_id) => {
218 if macro_def_id.is_local() {
219 LinkFromSrc::Local(clean::Span::new(data.def_site))
220 } else {
221 LinkFromSrc::External(macro_def_id)
222 }
223 }
224 None => return true,
225 };
226 let new_span = data.call_site;
227 let macro_name = macro_name.as_str();
228 // The "call_site" includes the whole macro with its "arguments". We only want
229 // the macro name.
230 let new_span = new_span.with_hi(new_span.lo() + BytePos(macro_name.len() as u32));
231 self.matches.insert(new_span.into(), link_from_src);
232 true
233 }
234}
235
236impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
237 type NestedFilter = nested_filter::All;
238
239 fn maybe_tcx(&mut self) -> Self::MaybeTyCtxt {
240 self.tcx
241 }
242
243 fn visit_nested_body(&mut self, body_id: hir::BodyId) -> Self::Result {
244 let maybe_typeck_results =
245 self.maybe_typeck_results.replace(LazyTypeckResults { body_id, cache: None });
246 self.visit_body(self.tcx.hir_body(body_id));
247 self.maybe_typeck_results = maybe_typeck_results;
248 }
249
250 fn visit_anon_const(&mut self, ct: &'tcx hir::AnonConst) {
251 // FIXME: Typeck'ing anon consts leads to ICEs in rustc if the parent body wasn't typeck'ed
252 // yet. See #156418. Figure out what the best and proper solution for this is. Until
253 // then, let's prevent `typeck` from being called on anon consts by not setting
254 // `maybe_typeck_results` to `Some(_)`.
255 let maybe_typeck_results = self.maybe_typeck_results.take();
256 self.visit_body(self.tcx.hir_body(ct.body));
257 self.maybe_typeck_results = maybe_typeck_results;
258 }
259
260 fn visit_path(&mut self, path: &hir::Path<'tcx>, _id: HirId) {
261 if self.handle_macro(path.span) {
262 return;
263 }
264 self.handle_path(path, false);
265 intravisit::walk_path(self, path);
266 }
267
268 fn visit_qpath(&mut self, qpath: &QPath<'tcx>, id: HirId, _span: rustc_span::Span) {
269 match *qpath {
270 QPath::TypeRelative(qself, segment) => {
271 // FIXME: This doesn't work for paths in *types* since HIR ty lowering currently
272 // doesn't write back the resolution of type-relative paths. Updating it to
273 // do so should be a simple fix.
274 // FIXME: This obviously doesn't support item signatures / non-bodies. Sadly, rustc
275 // currently doesn't keep around that information & thus can't provide an API
276 // for it.
277 // `ItemCtxt`s would need a place to write back the resolution of type-
278 // dependent definitions. Ideally there was some sort of query keyed on the
279 // `LocalDefId` of the owning item that returns some table with which we can
280 // map the `HirId` to a `DefId`.
281 // Of course, we could re-HIR-ty-lower such paths *here* if we were to extend
282 // the public API of HIR analysis. However, I strongly advise against it as
283 // it would be too much of a hack.
284 if let Some(typeck_results) = self.maybe_typeck_results() {
285 let path = hir::Path {
286 // We change the span to not include parens.
287 span: segment.ident.span,
288 res: typeck_results.qpath_res(qpath, id),
289 segments: std::slice::from_ref(segment),
290 };
291 self.handle_path(&path, false);
292 }
293
294 rustc_ast::visit::try_visit!(self.visit_ty_unambig(qself));
295 self.visit_path_segment(segment);
296 }
297 QPath::Resolved(maybe_qself, path) => {
298 self.handle_path(path, true);
299
300 rustc_ast::visit::visit_opt!(self, visit_ty_unambig, maybe_qself);
301 if !self.handle_macro(path.span) {
302 intravisit::walk_path(self, path);
303 }
304 }
305 }
306 }
307
308 fn visit_mod(&mut self, m: &'tcx Mod<'tcx>, span: rustc_span::Span, id: HirId) {
309 // To make the difference between "mod foo {}" and "mod foo;". In case we "import" another
310 // file, we want to link to it. Otherwise no need to create a link.
311 if !span.overlaps(m.spans.inner_span) {
312 // Now that we confirmed it's a file import, we want to get the span for the module
313 // name only and not all the "mod foo;".
314 if let Node::Item(item) = self.tcx.hir_node(id) {
315 let (ident, _) = item.expect_mod();
316 self.matches.insert(
317 ident.span.into(),
318 LinkFromSrc::Local(clean::Span::new(m.spans.inner_span)),
319 );
320 }
321 } else {
322 // If it's a "mod foo {}", we want to look to its documentation page.
323 self.extract_info_from_hir_id(id);
324 }
325 intravisit::walk_mod(self, m);
326 }
327
328 fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) {
329 match expr.kind {
330 ExprKind::MethodCall(segment, ..) => {
331 if let Some(typeck_results) = self.maybe_typeck_results()
332 && let Some(def_id) = typeck_results.type_dependent_def_id(expr.hir_id)
333 {
334 self.matches.insert(segment.ident.span.into(), self.link_for_def(def_id));
335 }
336 }
337 // We don't want to go deeper into the macro.
338 _ if self.handle_macro(expr.span) => return,
339 _ => {}
340 }
341 intravisit::walk_expr(self, expr);
342 }
343
344 fn visit_item(&mut self, item: &'tcx Item<'tcx>) {
345 // We're no longer in a body since we've crossed an item boundary.
346 // Temporarily take away the typeck results which are only valid in bodies.
347 let maybe_typeck_results = self.maybe_typeck_results.take();
348
349 match item.kind {
350 ItemKind::Static(..)
351 | ItemKind::Const(..)
352 | ItemKind::Fn { .. }
353 | ItemKind::Macro(..)
354 | ItemKind::TyAlias(..)
355 | ItemKind::Enum(..)
356 | ItemKind::Struct(..)
357 | ItemKind::Union(..)
358 | ItemKind::Trait { .. }
359 | ItemKind::TraitAlias(..) => self.extract_info_from_hir_id(item.hir_id()),
360 ItemKind::Impl(_)
361 | ItemKind::Use(..)
362 | ItemKind::ExternCrate(..)
363 | ItemKind::ForeignMod { .. }
364 | ItemKind::GlobalAsm { .. }
365 // We already have "visit_mod" above so no need to check it here.
366 | ItemKind::Mod(..) => {}
367 }
368
369 intravisit::walk_item(self, item);
370
371 self.maybe_typeck_results = maybe_typeck_results;
372 }
373}
374
375/// Lazily computed & cached [`ty::TypeckResults`].
376struct LazyTypeckResults<'tcx> {
377 body_id: hir::BodyId,
378 cache: Option<&'tcx ty::TypeckResults<'tcx>>,
379}