rustdoc/doctest/
make.rs

1//! Logic for transforming the raw code given by the user into something actually
2//! runnable, e.g. by adding a `main` function if it doesn't already exist.
3
4use std::io;
5use std::sync::Arc;
6
7use rustc_ast as ast;
8use rustc_errors::emitter::stderr_destination;
9use rustc_errors::{ColorConfig, FatalError};
10use rustc_parse::new_parser_from_source_str;
11use rustc_parse::parser::attr::InnerAttrPolicy;
12use rustc_session::parse::ParseSess;
13use rustc_span::FileName;
14use rustc_span::edition::Edition;
15use rustc_span::source_map::SourceMap;
16use rustc_span::symbol::sym;
17use tracing::debug;
18
19use super::GlobalTestOptions;
20use crate::html::markdown::LangString;
21
22/// This struct contains information about the doctest itself which is then used to generate
23/// doctest source code appropriately.
24pub(crate) struct DocTestBuilder {
25    pub(crate) supports_color: bool,
26    pub(crate) already_has_extern_crate: bool,
27    pub(crate) has_main_fn: bool,
28    pub(crate) crate_attrs: String,
29    /// If this is a merged doctest, it will be put into `everything_else`, otherwise it will
30    /// put into `crate_attrs`.
31    pub(crate) maybe_crate_attrs: String,
32    pub(crate) crates: String,
33    pub(crate) everything_else: String,
34    pub(crate) test_id: Option<String>,
35    pub(crate) failed_ast: bool,
36    pub(crate) can_be_merged: bool,
37}
38
39impl DocTestBuilder {
40    pub(crate) fn new(
41        source: &str,
42        crate_name: Option<&str>,
43        edition: Edition,
44        can_merge_doctests: bool,
45        // If `test_id` is `None`, it means we're generating code for a code example "run" link.
46        test_id: Option<String>,
47        lang_str: Option<&LangString>,
48    ) -> Self {
49        let can_merge_doctests = can_merge_doctests
50            && lang_str.is_some_and(|lang_str| {
51                !lang_str.compile_fail && !lang_str.test_harness && !lang_str.standalone_crate
52            });
53
54        let Some(SourceInfo { crate_attrs, maybe_crate_attrs, crates, everything_else }) =
55            partition_source(source, edition)
56        else {
57            return Self::invalid(
58                String::new(),
59                String::new(),
60                String::new(),
61                source.to_string(),
62                test_id,
63            );
64        };
65
66        // Uses librustc_ast to parse the doctest and find if there's a main fn and the extern
67        // crate already is included.
68        let Ok((
69            ParseSourceInfo {
70                has_main_fn,
71                found_extern_crate,
72                supports_color,
73                has_global_allocator,
74                has_macro_def,
75                ..
76            },
77            failed_ast,
78        )) = check_for_main_and_extern_crate(
79            crate_name,
80            source,
81            &everything_else,
82            &crates,
83            edition,
84            can_merge_doctests,
85        )
86        else {
87            // If the parser panicked due to a fatal error, pass the test code through unchanged.
88            // The error will be reported during compilation.
89            return Self::invalid(crate_attrs, maybe_crate_attrs, crates, everything_else, test_id);
90        };
91        // If the AST returned an error, we don't want this doctest to be merged with the
92        // others. Same if it contains `#[feature]` or `#[no_std]`.
93        let can_be_merged = can_merge_doctests
94            && !failed_ast
95            && !has_global_allocator
96            && crate_attrs.is_empty()
97            // If this is a merged doctest and a defined macro uses `$crate`, then the path will
98            // not work, so better not put it into merged doctests.
99            && !(has_macro_def && everything_else.contains("$crate"));
100        Self {
101            supports_color,
102            has_main_fn,
103            crate_attrs,
104            maybe_crate_attrs,
105            crates,
106            everything_else,
107            already_has_extern_crate: found_extern_crate,
108            test_id,
109            failed_ast: false,
110            can_be_merged,
111        }
112    }
113
114    fn invalid(
115        crate_attrs: String,
116        maybe_crate_attrs: String,
117        crates: String,
118        everything_else: String,
119        test_id: Option<String>,
120    ) -> Self {
121        Self {
122            supports_color: false,
123            has_main_fn: false,
124            crate_attrs,
125            maybe_crate_attrs,
126            crates,
127            everything_else,
128            already_has_extern_crate: false,
129            test_id,
130            failed_ast: true,
131            can_be_merged: false,
132        }
133    }
134
135    /// Transforms a test into code that can be compiled into a Rust binary, and returns the number of
136    /// lines before the test code begins.
137    pub(crate) fn generate_unique_doctest(
138        &self,
139        test_code: &str,
140        dont_insert_main: bool,
141        opts: &GlobalTestOptions,
142        crate_name: Option<&str>,
143    ) -> (String, usize) {
144        if self.failed_ast {
145            // If the AST failed to compile, no need to go generate a complete doctest, the error
146            // will be better this way.
147            return (test_code.to_string(), 0);
148        }
149        let mut line_offset = 0;
150        let mut prog = String::new();
151        let everything_else = self.everything_else.trim();
152        if opts.attrs.is_empty() {
153            // If there aren't any attributes supplied by #![doc(test(attr(...)))], then allow some
154            // lints that are commonly triggered in doctests. The crate-level test attributes are
155            // commonly used to make tests fail in case they trigger warnings, so having this there in
156            // that case may cause some tests to pass when they shouldn't have.
157            prog.push_str("#![allow(unused)]\n");
158            line_offset += 1;
159        }
160
161        // Next, any attributes that came from the crate root via #![doc(test(attr(...)))].
162        for attr in &opts.attrs {
163            prog.push_str(&format!("#![{attr}]\n"));
164            line_offset += 1;
165        }
166
167        // Now push any outer attributes from the example, assuming they
168        // are intended to be crate attributes.
169        prog.push_str(&self.crate_attrs);
170        prog.push_str(&self.maybe_crate_attrs);
171        prog.push_str(&self.crates);
172
173        // Don't inject `extern crate std` because it's already injected by the
174        // compiler.
175        if !self.already_has_extern_crate &&
176            !opts.no_crate_inject &&
177            let Some(crate_name) = crate_name &&
178            crate_name != "std" &&
179            // Don't inject `extern crate` if the crate is never used.
180            // NOTE: this is terribly inaccurate because it doesn't actually
181            // parse the source, but only has false positives, not false
182            // negatives.
183            test_code.contains(crate_name)
184        {
185            // rustdoc implicitly inserts an `extern crate` item for the own crate
186            // which may be unused, so we need to allow the lint.
187            prog.push_str("#[allow(unused_extern_crates)]\n");
188
189            prog.push_str(&format!("extern crate r#{crate_name};\n"));
190            line_offset += 1;
191        }
192
193        // FIXME: This code cannot yet handle no_std test cases yet
194        if dont_insert_main || self.has_main_fn || prog.contains("![no_std]") {
195            prog.push_str(everything_else);
196        } else {
197            let returns_result = everything_else.ends_with("(())");
198            // Give each doctest main function a unique name.
199            // This is for example needed for the tooling around `-C instrument-coverage`.
200            let inner_fn_name = if let Some(ref test_id) = self.test_id {
201                format!("_doctest_main_{test_id}")
202            } else {
203                "_inner".into()
204            };
205            let inner_attr = if self.test_id.is_some() { "#[allow(non_snake_case)] " } else { "" };
206            let (main_pre, main_post) = if returns_result {
207                (
208                    format!(
209                        "fn main() {{ {inner_attr}fn {inner_fn_name}() -> Result<(), impl core::fmt::Debug> {{\n",
210                    ),
211                    format!("\n}} {inner_fn_name}().unwrap() }}"),
212                )
213            } else if self.test_id.is_some() {
214                (
215                    format!("fn main() {{ {inner_attr}fn {inner_fn_name}() {{\n",),
216                    format!("\n}} {inner_fn_name}() }}"),
217                )
218            } else {
219                ("fn main() {\n".into(), "\n}".into())
220            };
221            // Note on newlines: We insert a line/newline *before*, and *after*
222            // the doctest and adjust the `line_offset` accordingly.
223            // In the case of `-C instrument-coverage`, this means that the generated
224            // inner `main` function spans from the doctest opening codeblock to the
225            // closing one. For example
226            // /// ``` <- start of the inner main
227            // /// <- code under doctest
228            // /// ``` <- end of the inner main
229            line_offset += 1;
230
231            prog.push_str(&main_pre);
232
233            // add extra 4 spaces for each line to offset the code block
234            if opts.insert_indent_space {
235                prog.push_str(
236                    &everything_else
237                        .lines()
238                        .map(|line| format!("    {}", line))
239                        .collect::<Vec<String>>()
240                        .join("\n"),
241                );
242            } else {
243                prog.push_str(everything_else);
244            };
245            prog.push_str(&main_post);
246        }
247
248        debug!("final doctest:\n{prog}");
249
250        (prog, line_offset)
251    }
252}
253
254#[derive(PartialEq, Eq, Debug)]
255enum ParsingResult {
256    Failed,
257    AstError,
258    Ok,
259}
260
261fn cancel_error_count(psess: &ParseSess) {
262    // Reset errors so that they won't be reported as compiler bugs when dropping the
263    // dcx. Any errors in the tests will be reported when the test file is compiled,
264    // Note that we still need to cancel the errors above otherwise `Diag` will panic on
265    // drop.
266    psess.dcx().reset_err_count();
267}
268
269fn parse_source(
270    source: String,
271    info: &mut ParseSourceInfo,
272    crate_name: &Option<&str>,
273) -> ParsingResult {
274    use rustc_errors::DiagCtxt;
275    use rustc_errors::emitter::{Emitter, HumanEmitter};
276    use rustc_parse::parser::ForceCollect;
277    use rustc_span::source_map::FilePathMapping;
278
279    let filename = FileName::anon_source_code(&source);
280
281    // Any errors in parsing should also appear when the doctest is compiled for real, so just
282    // send all the errors that librustc_ast emits directly into a `Sink` instead of stderr.
283    let sm = Arc::new(SourceMap::new(FilePathMapping::empty()));
284    let fallback_bundle = rustc_errors::fallback_fluent_bundle(
285        rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(),
286        false,
287    );
288    info.supports_color =
289        HumanEmitter::new(stderr_destination(ColorConfig::Auto), fallback_bundle.clone())
290            .supports_color();
291
292    let emitter = HumanEmitter::new(Box::new(io::sink()), fallback_bundle);
293
294    // FIXME(misdreavus): pass `-Z treat-err-as-bug` to the doctest parser
295    let dcx = DiagCtxt::new(Box::new(emitter)).disable_warnings();
296    let psess = ParseSess::with_dcx(dcx, sm);
297
298    let mut parser = match new_parser_from_source_str(&psess, filename, source) {
299        Ok(p) => p,
300        Err(errs) => {
301            errs.into_iter().for_each(|err| err.cancel());
302            cancel_error_count(&psess);
303            return ParsingResult::Failed;
304        }
305    };
306    let mut parsing_result = ParsingResult::Ok;
307
308    // Recurse through functions body. It is necessary because the doctest source code is
309    // wrapped in a function to limit the number of AST errors. If we don't recurse into
310    // functions, we would thing all top-level items (so basically nothing).
311    fn check_item(
312        item: &ast::Item,
313        info: &mut ParseSourceInfo,
314        crate_name: &Option<&str>,
315        is_top_level: bool,
316    ) {
317        if !info.has_global_allocator
318            && item.attrs.iter().any(|attr| attr.name_or_empty() == sym::global_allocator)
319        {
320            info.has_global_allocator = true;
321        }
322        match item.kind {
323            ast::ItemKind::Fn(ref fn_item) if !info.has_main_fn => {
324                if item.ident.name == sym::main && is_top_level {
325                    info.has_main_fn = true;
326                }
327                if let Some(ref body) = fn_item.body {
328                    for stmt in &body.stmts {
329                        match stmt.kind {
330                            ast::StmtKind::Item(ref item) => {
331                                check_item(item, info, crate_name, false)
332                            }
333                            ast::StmtKind::MacCall(..) => info.found_macro = true,
334                            _ => {}
335                        }
336                    }
337                }
338            }
339            ast::ItemKind::ExternCrate(original) => {
340                if !info.found_extern_crate
341                    && let Some(crate_name) = crate_name
342                {
343                    info.found_extern_crate = match original {
344                        Some(name) => name.as_str() == *crate_name,
345                        None => item.ident.as_str() == *crate_name,
346                    };
347                }
348            }
349            ast::ItemKind::MacCall(..) => info.found_macro = true,
350            ast::ItemKind::MacroDef(..) => info.has_macro_def = true,
351            _ => {}
352        }
353    }
354
355    loop {
356        match parser.parse_item(ForceCollect::No) {
357            Ok(Some(item)) => {
358                check_item(&item, info, crate_name, true);
359
360                if info.has_main_fn && info.found_extern_crate {
361                    break;
362                }
363            }
364            Ok(None) => break,
365            Err(e) => {
366                parsing_result = ParsingResult::AstError;
367                e.cancel();
368                break;
369            }
370        }
371
372        // The supplied item is only used for diagnostics,
373        // which are swallowed here anyway.
374        parser.maybe_consume_incorrect_semicolon(None);
375    }
376
377    cancel_error_count(&psess);
378    parsing_result
379}
380
381#[derive(Default)]
382struct ParseSourceInfo {
383    has_main_fn: bool,
384    found_extern_crate: bool,
385    found_macro: bool,
386    supports_color: bool,
387    has_global_allocator: bool,
388    has_macro_def: bool,
389}
390
391fn check_for_main_and_extern_crate(
392    crate_name: Option<&str>,
393    original_source_code: &str,
394    everything_else: &str,
395    crates: &str,
396    edition: Edition,
397    can_merge_doctests: bool,
398) -> Result<(ParseSourceInfo, bool), FatalError> {
399    let result = rustc_driver::catch_fatal_errors(|| {
400        rustc_span::create_session_if_not_set_then(edition, |_| {
401            let mut info =
402                ParseSourceInfo { found_extern_crate: crate_name.is_none(), ..Default::default() };
403
404            let mut parsing_result =
405                parse_source(format!("{crates}{everything_else}"), &mut info, &crate_name);
406            // No need to double-check this if the "merged doctests" feature isn't enabled (so
407            // before the 2024 edition).
408            if can_merge_doctests && parsing_result != ParsingResult::Ok {
409                // If we found an AST error, we want to ensure it's because of an expression being
410                // used outside of a function.
411                //
412                // To do so, we wrap in a function in order to make sure that the doctest AST is
413                // correct. For example, if your doctest is `foo::bar()`, if we don't wrap it in a
414                // block, it would emit an AST error, which would be problematic for us since we
415                // want to filter out such errors which aren't "real" errors.
416                //
417                // The end goal is to be able to merge as many doctests as possible as one for much
418                // faster doctests run time.
419                parsing_result = parse_source(
420                    format!("{crates}\nfn __doctest_wrap(){{{everything_else}\n}}"),
421                    &mut info,
422                    &crate_name,
423                );
424            }
425
426            (info, parsing_result)
427        })
428    });
429    let (mut info, parsing_result) = match result {
430        Err(..) | Ok((_, ParsingResult::Failed)) => return Err(FatalError),
431        Ok((info, parsing_result)) => (info, parsing_result),
432    };
433
434    // If a doctest's `fn main` is being masked by a wrapper macro, the parsing loop above won't
435    // see it. In that case, run the old text-based scan to see if they at least have a main
436    // function written inside a macro invocation. See
437    // https://github.com/rust-lang/rust/issues/56898
438    if info.found_macro
439        && !info.has_main_fn
440        && original_source_code
441            .lines()
442            .map(|line| {
443                let comment = line.find("//");
444                if let Some(comment_begins) = comment { &line[0..comment_begins] } else { line }
445            })
446            .any(|code| code.contains("fn main"))
447    {
448        info.has_main_fn = true;
449    }
450
451    Ok((info, parsing_result != ParsingResult::Ok))
452}
453
454enum AttrKind {
455    CrateAttr,
456    Attr,
457}
458
459/// Returns `Some` if the attribute is complete and `Some(true)` if it is an attribute that can be
460/// placed at the crate root.
461fn check_if_attr_is_complete(source: &str, edition: Edition) -> Option<AttrKind> {
462    if source.is_empty() {
463        // Empty content so nothing to check in here...
464        return None;
465    }
466    let not_crate_attrs = [sym::forbid, sym::allow, sym::warn, sym::deny];
467
468    rustc_driver::catch_fatal_errors(|| {
469        rustc_span::create_session_if_not_set_then(edition, |_| {
470            use rustc_errors::DiagCtxt;
471            use rustc_errors::emitter::HumanEmitter;
472            use rustc_span::source_map::FilePathMapping;
473
474            let filename = FileName::anon_source_code(source);
475            // Any errors in parsing should also appear when the doctest is compiled for real, so just
476            // send all the errors that librustc_ast emits directly into a `Sink` instead of stderr.
477            let sm = Arc::new(SourceMap::new(FilePathMapping::empty()));
478            let fallback_bundle = rustc_errors::fallback_fluent_bundle(
479                rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(),
480                false,
481            );
482
483            let emitter = HumanEmitter::new(Box::new(io::sink()), fallback_bundle);
484
485            let dcx = DiagCtxt::new(Box::new(emitter)).disable_warnings();
486            let psess = ParseSess::with_dcx(dcx, sm);
487            let mut parser = match new_parser_from_source_str(&psess, filename, source.to_owned()) {
488                Ok(p) => p,
489                Err(errs) => {
490                    errs.into_iter().for_each(|err| err.cancel());
491                    // If there is an unclosed delimiter, an error will be returned by the
492                    // tokentrees.
493                    return None;
494                }
495            };
496            // If a parsing error happened, it's very likely that the attribute is incomplete.
497            let ret = match parser.parse_attribute(InnerAttrPolicy::Permitted) {
498                Ok(attr) => {
499                    let attr_name = attr.name_or_empty();
500
501                    if not_crate_attrs.contains(&attr_name) {
502                        // There is one exception to these attributes:
503                        // `#![allow(internal_features)]`. If this attribute is used, we need to
504                        // consider it only as a crate-level attribute.
505                        if attr_name == sym::allow
506                            && let Some(list) = attr.meta_item_list()
507                            && list.iter().any(|sub_attr| {
508                                sub_attr.name_or_empty().as_str() == "internal_features"
509                            })
510                        {
511                            Some(AttrKind::CrateAttr)
512                        } else {
513                            Some(AttrKind::Attr)
514                        }
515                    } else {
516                        Some(AttrKind::CrateAttr)
517                    }
518                }
519                Err(e) => {
520                    e.cancel();
521                    None
522                }
523            };
524            ret
525        })
526    })
527    .unwrap_or(None)
528}
529
530fn handle_attr(mod_attr_pending: &mut String, source_info: &mut SourceInfo, edition: Edition) {
531    if let Some(attr_kind) = check_if_attr_is_complete(mod_attr_pending, edition) {
532        let push_to = match attr_kind {
533            AttrKind::CrateAttr => &mut source_info.crate_attrs,
534            AttrKind::Attr => &mut source_info.maybe_crate_attrs,
535        };
536        push_to.push_str(mod_attr_pending);
537        push_to.push('\n');
538        // If it's complete, then we can clear the pending content.
539        mod_attr_pending.clear();
540    } else {
541        mod_attr_pending.push('\n');
542    }
543}
544
545#[derive(Default)]
546struct SourceInfo {
547    crate_attrs: String,
548    maybe_crate_attrs: String,
549    crates: String,
550    everything_else: String,
551}
552
553fn partition_source(s: &str, edition: Edition) -> Option<SourceInfo> {
554    #[derive(Copy, Clone, PartialEq)]
555    enum PartitionState {
556        Attrs,
557        Crates,
558        Other,
559    }
560    let mut source_info = SourceInfo::default();
561    let mut state = PartitionState::Attrs;
562    let mut mod_attr_pending = String::new();
563
564    for line in s.lines() {
565        let trimline = line.trim();
566
567        // FIXME(misdreavus): if a doc comment is placed on an extern crate statement, it will be
568        // shunted into "everything else"
569        match state {
570            PartitionState::Attrs => {
571                state = if trimline.starts_with("#![") {
572                    mod_attr_pending = line.to_owned();
573                    handle_attr(&mut mod_attr_pending, &mut source_info, edition);
574                    continue;
575                } else if trimline.chars().all(|c| c.is_whitespace())
576                    || (trimline.starts_with("//") && !trimline.starts_with("///"))
577                {
578                    PartitionState::Attrs
579                } else if trimline.starts_with("extern crate")
580                    || trimline.starts_with("#[macro_use] extern crate")
581                {
582                    PartitionState::Crates
583                } else {
584                    // First we check if the previous attribute was "complete"...
585                    if !mod_attr_pending.is_empty() {
586                        // If not, then we append the new line into the pending attribute to check
587                        // if this time it's complete...
588                        mod_attr_pending.push_str(line);
589                        if !trimline.is_empty() {
590                            handle_attr(&mut mod_attr_pending, &mut source_info, edition);
591                        }
592                        continue;
593                    } else {
594                        PartitionState::Other
595                    }
596                };
597            }
598            PartitionState::Crates => {
599                state = if trimline.starts_with("extern crate")
600                    || trimline.starts_with("#[macro_use] extern crate")
601                    || trimline.chars().all(|c| c.is_whitespace())
602                    || (trimline.starts_with("//") && !trimline.starts_with("///"))
603                {
604                    PartitionState::Crates
605                } else {
606                    PartitionState::Other
607                };
608            }
609            PartitionState::Other => {}
610        }
611
612        match state {
613            PartitionState::Attrs => {
614                source_info.crate_attrs.push_str(line);
615                source_info.crate_attrs.push('\n');
616            }
617            PartitionState::Crates => {
618                source_info.crates.push_str(line);
619                source_info.crates.push('\n');
620            }
621            PartitionState::Other => {
622                source_info.everything_else.push_str(line);
623                source_info.everything_else.push('\n');
624            }
625        }
626    }
627
628    if !mod_attr_pending.is_empty() {
629        debug!("invalid doctest code: {s:?}");
630        return None;
631    }
632
633    source_info.everything_else = source_info.everything_else.trim().to_string();
634
635    debug!("crate_attrs:\n{}{}", source_info.crate_attrs, source_info.maybe_crate_attrs);
636    debug!("crates:\n{}", source_info.crates);
637    debug!("after:\n{}", source_info.everything_else);
638
639    Some(source_info)
640}