Skip to main content

rustdoc/
scrape_examples.rs

1//! This module analyzes crates to find call sites that can serve as examples in the documentation.
2
3use std::fs;
4use std::path::PathBuf;
5
6use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
7use rustc_errors::DiagCtxtHandle;
8use rustc_hir as hir;
9use rustc_hir::intravisit::{self, Visitor};
10use rustc_macros::{Decodable, Encodable};
11use rustc_middle::hir::nested_filter;
12use rustc_middle::ty::{self, TyCtxt};
13use rustc_serialize::opaque::{FileEncoder, MemDecoder};
14use rustc_serialize::{Decodable, Encodable};
15use rustc_session::getopts;
16use rustc_span::def_id::{CrateNum, DefPathHash, LOCAL_CRATE};
17use rustc_span::edition::Edition;
18use rustc_span::{BytePos, FileName, SourceFile, Span};
19use tracing::{debug, trace, warn};
20
21use crate::html::render::Context;
22use crate::{clean, config, formats};
23
24#[derive(Debug, Clone)]
25pub(crate) struct ScrapeExamplesOptions {
26    output_path: PathBuf,
27    target_crates: Vec<String>,
28    pub(crate) scrape_tests: bool,
29}
30
31impl ScrapeExamplesOptions {
32    pub(crate) fn new(matches: &getopts::Matches, dcx: DiagCtxtHandle<'_>) -> Option<Self> {
33        let output_path = matches.opt_str("scrape-examples-output-path");
34        let target_crates = matches.opt_strs("scrape-examples-target-crate");
35        let scrape_tests = matches.opt_present("scrape-tests");
36        match (output_path, !target_crates.is_empty(), scrape_tests) {
37            (Some(output_path), true, _) => Some(ScrapeExamplesOptions {
38                output_path: PathBuf::from(output_path),
39                target_crates,
40                scrape_tests,
41            }),
42            (Some(_), false, _) | (None, true, _) => {
43                dcx.fatal(
44                    "must use --scrape-examples-output-path and --scrape-examples-target-crate \
45                     together",
46                );
47            }
48            (None, false, true) => {
49                dcx.fatal(
50                    "must use --scrape-examples-output-path and \
51                     --scrape-examples-target-crate with --scrape-tests",
52                );
53            }
54            (None, false, false) => None,
55        }
56    }
57}
58
59#[derive(Encodable, Decodable, Debug, Clone)]
60pub(crate) struct SyntaxRange {
61    pub(crate) byte_span: (u32, u32),
62    pub(crate) line_span: (usize, usize),
63}
64
65impl SyntaxRange {
66    fn new(span: rustc_span::Span, file: &SourceFile) -> Option<Self> {
67        let get_pos = |bytepos: BytePos| file.original_relative_byte_pos(bytepos).0;
68        let get_line = |bytepos: BytePos| file.lookup_line(file.relative_position(bytepos));
69
70        Some(SyntaxRange {
71            byte_span: (get_pos(span.lo()), get_pos(span.hi())),
72            line_span: (get_line(span.lo())?, get_line(span.hi())?),
73        })
74    }
75}
76
77#[derive(Encodable, Decodable, Debug, Clone)]
78pub(crate) struct CallLocation {
79    pub(crate) call_expr: SyntaxRange,
80    pub(crate) call_ident: SyntaxRange,
81    pub(crate) enclosing_item: SyntaxRange,
82}
83
84impl CallLocation {
85    fn new(
86        expr_span: rustc_span::Span,
87        ident_span: rustc_span::Span,
88        enclosing_item_span: rustc_span::Span,
89        source_file: &SourceFile,
90    ) -> Option<Self> {
91        Some(CallLocation {
92            call_expr: SyntaxRange::new(expr_span, source_file)?,
93            call_ident: SyntaxRange::new(ident_span, source_file)?,
94            enclosing_item: SyntaxRange::new(enclosing_item_span, source_file)?,
95        })
96    }
97}
98
99#[derive(Encodable, Decodable, Debug, Clone)]
100pub(crate) struct CallData {
101    pub(crate) locations: Vec<CallLocation>,
102    pub(crate) url: String,
103    pub(crate) display_name: String,
104    pub(crate) edition: Edition,
105    pub(crate) is_bin: bool,
106}
107
108pub(crate) type FnCallLocations = FxIndexMap<PathBuf, CallData>;
109pub(crate) type AllCallLocations = FxIndexMap<DefPathHash, FnCallLocations>;
110
111/// Visitor for traversing a crate and finding instances of function calls.
112struct FindCalls<'a, 'tcx> {
113    cx: Context<'tcx>,
114    target_crates: Vec<CrateNum>,
115    calls: &'a mut AllCallLocations,
116    bin_crate: bool,
117    call_ident_spans: FxHashSet<Span>,
118}
119
120impl<'a, 'tcx> Visitor<'tcx> for FindCalls<'a, 'tcx>
121where
122    'tcx: 'a,
123{
124    type NestedFilter = nested_filter::OnlyBodies;
125
126    fn maybe_tcx(&mut self) -> Self::MaybeTyCtxt {
127        self.cx.tcx()
128    }
129
130    fn visit_expr(&mut self, ex: &'tcx hir::Expr<'tcx>) {
131        intravisit::walk_expr(self, ex);
132
133        let tcx = self.cx.tcx();
134
135        // If we visit an item that contains an expression outside a function body,
136        // then we need to exit before calling typeck (which will panic). See
137        // test/run-make/rustdoc-scrape-examples-invalid-expr for an example.
138        if tcx.hir_maybe_body_owned_by(ex.hir_id.owner.def_id).is_none() {
139            return;
140        }
141
142        // Get type of function if expression is a function call
143        let (ty, call_span, ident_span) = match ex.kind {
144            hir::ExprKind::Call(f, _) => {
145                let types = tcx.typeck(ex.hir_id.owner.def_id);
146
147                if let Some(ty) = types.node_type_opt(f.hir_id) {
148                    (ty, ex.span, f.span)
149                } else {
150                    trace!("node_type_opt({}) = None", f.hir_id);
151                    return;
152                }
153            }
154            hir::ExprKind::MethodCall(path, _, _, call_span) => {
155                let types = tcx.typeck(ex.hir_id.owner.def_id);
156                let Some(def_id) = types.type_dependent_def_id(ex.hir_id) else {
157                    trace!("type_dependent_def_id({}) = None", ex.hir_id);
158                    return;
159                };
160
161                let ident_span = path.ident.span;
162                (tcx.type_of(def_id).instantiate_identity(), call_span, ident_span)
163            }
164            _ => {
165                return;
166            }
167        };
168
169        if !self.call_ident_spans.insert(ident_span) {
170            return;
171        }
172
173        // If this span comes from a macro expansion, then the source code may not actually show
174        // a use of the given item, so it would be a poor example. Hence, we skip all uses in
175        // macros.
176        if call_span.from_expansion() {
177            trace!("Rejecting expr from macro: {call_span:?}");
178            return;
179        }
180
181        // If the enclosing item has a span coming from a proc macro, then we also don't want to
182        // include the example.
183        let enclosing_item_span = tcx.hir_span_with_body(tcx.hir_get_parent_item(ex.hir_id).into());
184        if enclosing_item_span.from_expansion() {
185            trace!("Rejecting expr ({call_span:?}) from macro item: {enclosing_item_span:?}");
186            return;
187        }
188
189        // If the enclosing item doesn't actually enclose the call, this means we probably have a
190        // weird macro issue even though the spans aren't tagged as being from an expansion.
191        if !enclosing_item_span.contains(call_span) {
192            warn!(
193                "Attempted to scrape call at [{call_span:?}] whose enclosing item \
194                 [{enclosing_item_span:?}] doesn't contain the span of the call."
195            );
196            return;
197        }
198
199        // Similarly for the call w/ the function ident.
200        if !call_span.contains(ident_span) {
201            warn!(
202                "Attempted to scrape call at [{call_span:?}] whose identifier [{ident_span:?}] was \
203                 not contained in the span of the call."
204            );
205            return;
206        }
207
208        // Save call site if the function resolves to a concrete definition
209        if let ty::FnDef(def_id, _) = ty.kind() {
210            if self.target_crates.iter().all(|krate| *krate != def_id.krate) {
211                trace!("Rejecting expr from crate not being documented: {call_span:?}");
212                return;
213            }
214
215            let source_map = tcx.sess.source_map();
216            let file = source_map.lookup_char_pos(call_span.lo()).file;
217            let file_path = match file.name.clone() {
218                FileName::Real(real_filename) => real_filename.into_local_path(),
219                _ => None,
220            };
221
222            if let Some(file_path) = file_path {
223                let abs_path = match fs::canonicalize(file_path.clone()) {
224                    Ok(abs_path) => abs_path,
225                    Err(_) => {
226                        trace!("Could not canonicalize file path: {}", file_path.display());
227                        return;
228                    }
229                };
230
231                let cx = &self.cx;
232                let clean_span = crate::clean::types::Span::new(call_span);
233                let url = match cx.href_from_span(clean_span, false) {
234                    Some(url) => url,
235                    None => {
236                        trace!(
237                            "Rejecting expr ({call_span:?}) whose clean span ({clean_span:?}) \
238                             cannot be turned into a link"
239                        );
240                        return;
241                    }
242                };
243
244                let mk_call_data = || {
245                    let display_name = file_path.display().to_string();
246                    let edition = call_span.edition();
247                    let is_bin = self.bin_crate;
248
249                    CallData { locations: Vec::new(), url, display_name, edition, is_bin }
250                };
251
252                let fn_key = tcx.def_path_hash(*def_id);
253                let fn_entries = self.calls.entry(fn_key).or_default();
254
255                trace!("Including expr: {call_span:?}");
256                let enclosing_item_span =
257                    source_map.span_extend_to_prev_char(enclosing_item_span, '\n', false);
258                let location =
259                    match CallLocation::new(call_span, ident_span, enclosing_item_span, &file) {
260                        Some(location) => location,
261                        None => {
262                            trace!("Could not get serializable call location for {call_span:?}");
263                            return;
264                        }
265                    };
266                fn_entries.entry(abs_path).or_insert_with(mk_call_data).locations.push(location);
267            }
268        }
269    }
270}
271
272pub(crate) fn run(
273    krate: clean::Crate,
274    mut renderopts: config::RenderOptions,
275    cache: formats::cache::Cache,
276    tcx: TyCtxt<'_>,
277    options: ScrapeExamplesOptions,
278    bin_crate: bool,
279) {
280    let inner = move || -> Result<(), String> {
281        let emit_dep_info = renderopts.dep_info().is_some();
282        // Generates source files for examples
283        renderopts.no_emit_shared = true;
284        let (cx, _) = Context::init(krate, renderopts, cache, tcx, Default::default())
285            .map_err(|e| e.to_string())?;
286
287        // Collect CrateIds corresponding to provided target crates
288        // If two different versions of the crate in the dependency tree, then examples will be
289        // collected from both.
290        let all_crates = tcx
291            .crates(())
292            .iter()
293            .chain([&LOCAL_CRATE])
294            .map(|crate_num| (crate_num, tcx.crate_name(*crate_num)))
295            .collect::<Vec<_>>();
296        let target_crates = options
297            .target_crates
298            .into_iter()
299            .flat_map(|target| all_crates.iter().filter(move |(_, name)| name.as_str() == target))
300            .map(|(crate_num, _)| **crate_num)
301            .collect::<Vec<_>>();
302
303        debug!("All crates in TyCtxt: {all_crates:?}");
304        debug!("Scrape examples target_crates: {target_crates:?}");
305
306        // Run call-finder on all items
307        let mut calls = FxIndexMap::default();
308        let mut finder = FindCalls {
309            calls: &mut calls,
310            cx,
311            target_crates,
312            bin_crate,
313            call_ident_spans: FxHashSet::default(),
314        };
315        tcx.hir_visit_all_item_likes_in_crate(&mut finder);
316
317        // The visitor might have found a type error, which we need to
318        // promote to a fatal error
319        if tcx.dcx().has_errors().is_some() {
320            return Err(String::from("Compilation failed, aborting rustdoc"));
321        }
322
323        // Sort call locations within a given file in document order
324        for fn_calls in calls.values_mut() {
325            for file_calls in fn_calls.values_mut() {
326                file_calls.locations.sort_by_key(|loc| loc.call_expr.byte_span.0);
327            }
328        }
329
330        // Save output to provided path
331        let mut encoder = FileEncoder::new(options.output_path).map_err(|e| e.to_string())?;
332        calls.encode(&mut encoder);
333        encoder.finish().map_err(|(_path, e)| e.to_string())?;
334
335        if emit_dep_info {
336            rustc_interface::passes::write_dep_info(tcx);
337        }
338
339        Ok(())
340    };
341
342    if let Err(e) = inner() {
343        tcx.dcx().fatal(e);
344    }
345}
346
347// Note: the DiagCtxt must be passed in explicitly because sess isn't available while parsing
348// options.
349pub(crate) fn load_call_locations(
350    with_examples: Vec<String>,
351    dcx: DiagCtxtHandle<'_>,
352    loaded_paths: &mut Vec<PathBuf>,
353) -> AllCallLocations {
354    let mut all_calls: AllCallLocations = FxIndexMap::default();
355    for path in with_examples {
356        loaded_paths.push(path.clone().into());
357        let bytes = match fs::read(&path) {
358            Ok(bytes) => bytes,
359            Err(e) => dcx.fatal(format!("failed to load examples: {e}")),
360        };
361        let Ok(mut decoder) = MemDecoder::new(&bytes, 0) else {
362            dcx.fatal(format!("Corrupt metadata encountered in {path}"))
363        };
364        let calls = AllCallLocations::decode(&mut decoder);
365
366        for (function, fn_calls) in calls.into_iter() {
367            all_calls.entry(function).or_default().extend(fn_calls.into_iter());
368        }
369    }
370
371    all_calls
372}