rustc_resolve/
rustdoc.rs

1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5/// Re-export the markdown parser used by rustdoc.
6pub use pulldown_cmark;
7use pulldown_cmark::{
8    BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
9};
10use rustc_ast as ast;
11use rustc_ast::attr::AttributeExt;
12use rustc_ast::join_path_syms;
13use rustc_ast::token::DocFragmentKind;
14use rustc_ast::util::comments::beautify_doc_string;
15use rustc_data_structures::fx::FxIndexMap;
16use rustc_data_structures::unord::UnordSet;
17use rustc_middle::ty::TyCtxt;
18use rustc_span::def_id::DefId;
19use rustc_span::source_map::SourceMap;
20use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
21use thin_vec::ThinVec;
22use tracing::{debug, trace};
23
24#[cfg(test)]
25mod tests;
26
27/// A portion of documentation, extracted from a `#[doc]` attribute.
28///
29/// Each variant contains the line number within the complete doc-comment where the fragment
30/// starts, as well as the Span where the corresponding doc comment or attribute is located.
31///
32/// Included files are kept separate from inline doc comments so that proper line-number
33/// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
34/// kept separate because of issue #42760.
35#[derive(Clone, PartialEq, Eq, Debug)]
36pub struct DocFragment {
37    pub span: Span,
38    /// The item this doc-comment came from.
39    /// Used to determine the scope in which doc links in this fragment are resolved.
40    /// Typically filled for reexport docs when they are merged into the docs of the
41    /// original reexported item.
42    /// If the id is not filled, which happens for the original reexported item, then
43    /// it has to be taken from somewhere else during doc link resolution.
44    pub item_id: Option<DefId>,
45    pub doc: Symbol,
46    pub kind: DocFragmentKind,
47    pub indent: usize,
48    /// Because we tamper with the spans context, this information cannot be correctly retrieved
49    /// later on. So instead, we compute it and store it here.
50    pub from_expansion: bool,
51}
52
53#[derive(Clone, Copy, Debug)]
54pub enum MalformedGenerics {
55    /// This link has unbalanced angle brackets.
56    ///
57    /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
58    UnbalancedAngleBrackets,
59    /// The generics are not attached to a type.
60    ///
61    /// For example, `<T>` should trigger this.
62    ///
63    /// This is detected by checking if the path is empty after the generics are stripped.
64    MissingType,
65    /// The link uses fully-qualified syntax, which is currently unsupported.
66    ///
67    /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
68    ///
69    /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
70    /// angle brackets.
71    HasFullyQualifiedSyntax,
72    /// The link has an invalid path separator.
73    ///
74    /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
75    /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
76    /// called.
77    ///
78    /// Note that this will also **not** be triggered if the invalid path separator is inside angle
79    /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
80    /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
81    ///
82    /// This is detected by checking if there is a colon followed by a non-colon in the link.
83    InvalidPathSeparator,
84    /// The link has too many angle brackets.
85    ///
86    /// For example, `Vec<<T>>` should trigger this.
87    TooManyAngleBrackets,
88    /// The link has empty angle brackets.
89    ///
90    /// For example, `Vec<>` should trigger this.
91    EmptyAngleBrackets,
92}
93
94/// Removes excess indentation on comments in order for the Markdown
95/// to be parsed correctly. This is necessary because the convention for
96/// writing documentation is to provide a space between the /// or //! marker
97/// and the doc text, but Markdown is whitespace-sensitive. For example,
98/// a block of text with four-space indentation is parsed as a code block,
99/// so if we didn't unindent comments, these list items
100///
101/// /// A list:
102/// ///
103/// ///    - Foo
104/// ///    - Bar
105///
106/// would be parsed as if they were in a code block, which is likely not what the user intended.
107pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
108    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
109    // fragments kind's lines are never starting with a whitespace unless they are using some
110    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
111    // we need to take into account the fact that the minimum indent minus one (to take this
112    // whitespace into account).
113    //
114    // For example:
115    //
116    // /// hello!
117    // #[doc = "another"]
118    //
119    // In this case, you want "hello! another" and not "hello!  another".
120    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
121        && docs.iter().any(|d| d.kind.is_sugared())
122    {
123        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
124        // "decide" how much the minimum indent will be.
125        1
126    } else {
127        0
128    };
129
130    // `min_indent` is used to know how much whitespaces from the start of each lines must be
131    // removed. Example:
132    //
133    // ///     hello!
134    // #[doc = "another"]
135    //
136    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
137    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
138    // (5 - 1) whitespaces.
139    let Some(min_indent) = docs
140        .iter()
141        .map(|fragment| {
142            fragment
143                .doc
144                .as_str()
145                .lines()
146                .filter(|line| line.chars().any(|c| !c.is_whitespace()))
147                .map(|line| {
148                    // Compare against either space or tab, ignoring whether they are
149                    // mixed or not.
150                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
151                    whitespace + (if fragment.kind.is_sugared() { 0 } else { add })
152                })
153                .min()
154                .unwrap_or(usize::MAX)
155        })
156        .min()
157    else {
158        return;
159    };
160
161    for fragment in docs {
162        if fragment.doc == sym::empty {
163            continue;
164        }
165
166        let indent = if !fragment.kind.is_sugared() && min_indent > 0 {
167            min_indent - add
168        } else {
169            min_indent
170        };
171
172        fragment.indent = indent;
173    }
174}
175
176/// The goal of this function is to apply the `DocFragment` transformation that is required when
177/// transforming into the final Markdown, which is applying the computed indent to each line in
178/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
179///
180/// Note: remove the trailing newline where appropriate
181pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
182    if frag.doc == sym::empty {
183        out.push('\n');
184        return;
185    }
186    let s = frag.doc.as_str();
187    let mut iter = s.lines();
188
189    while let Some(line) = iter.next() {
190        if line.chars().any(|c| !c.is_whitespace()) {
191            assert!(line.len() >= frag.indent);
192            out.push_str(&line[frag.indent..]);
193        } else {
194            out.push_str(line);
195        }
196        out.push('\n');
197    }
198}
199
200pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
201    attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
202    doc_only: bool,
203) -> (Vec<DocFragment>, ThinVec<A>) {
204    let (min_size, max_size) = attrs.size_hint();
205    let size_hint = max_size.unwrap_or(min_size);
206    let mut doc_fragments = Vec::with_capacity(size_hint);
207    let mut other_attrs = ThinVec::<A>::with_capacity(if doc_only { 0 } else { size_hint });
208    for (attr, item_id) in attrs {
209        if let Some((doc_str, fragment_kind)) = attr.doc_str_and_fragment_kind() {
210            let doc = beautify_doc_string(doc_str, fragment_kind.comment_kind());
211            let attr_span = attr.span();
212            let (span, from_expansion) = match fragment_kind {
213                DocFragmentKind::Sugared(_) => (attr_span, attr_span.from_expansion()),
214                DocFragmentKind::Raw(value_span) => {
215                    (value_span.with_ctxt(attr_span.ctxt()), value_span.from_expansion())
216                }
217            };
218            let fragment =
219                DocFragment { span, doc, kind: fragment_kind, item_id, indent: 0, from_expansion };
220            doc_fragments.push(fragment);
221        } else if !doc_only {
222            other_attrs.push(attr.clone());
223        }
224    }
225
226    doc_fragments.shrink_to_fit();
227    other_attrs.shrink_to_fit();
228
229    unindent_doc_fragments(&mut doc_fragments);
230
231    (doc_fragments, other_attrs)
232}
233
234/// Return the doc-comments on this item, grouped by the module they came from.
235/// The module can be different if this is a re-export with added documentation.
236///
237/// The last newline is not trimmed so the produced strings are reusable between
238/// early and late doc link resolution regardless of their position.
239pub fn prepare_to_doc_link_resolution(
240    doc_fragments: &[DocFragment],
241) -> FxIndexMap<Option<DefId>, String> {
242    let mut res = FxIndexMap::default();
243    for fragment in doc_fragments {
244        let out_str = res.entry(fragment.item_id).or_default();
245        add_doc_fragment(out_str, fragment);
246    }
247    res
248}
249
250/// Options for rendering Markdown in the main body of documentation.
251pub fn main_body_opts() -> Options {
252    Options::ENABLE_TABLES
253        | Options::ENABLE_FOOTNOTES
254        | Options::ENABLE_STRIKETHROUGH
255        | Options::ENABLE_TASKLISTS
256        | Options::ENABLE_SMART_PUNCTUATION
257}
258
259fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<Symbol, MalformedGenerics> {
260    let mut stripped_segment = String::new();
261    let mut param_depth = 0;
262
263    let mut latest_generics_chunk = String::new();
264
265    for c in segment {
266        if c == '<' {
267            param_depth += 1;
268            latest_generics_chunk.clear();
269        } else if c == '>' {
270            param_depth -= 1;
271            if latest_generics_chunk.contains(" as ") {
272                // The segment tries to use fully-qualified syntax, which is currently unsupported.
273                // Give a helpful error message instead of completely ignoring the angle brackets.
274                return Err(MalformedGenerics::HasFullyQualifiedSyntax);
275            }
276        } else if param_depth == 0 {
277            stripped_segment.push(c);
278        } else {
279            latest_generics_chunk.push(c);
280        }
281    }
282
283    if param_depth == 0 {
284        Ok(Symbol::intern(&stripped_segment))
285    } else {
286        // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
287        Err(MalformedGenerics::UnbalancedAngleBrackets)
288    }
289}
290
291pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
292    if !path_str.contains(['<', '>']) {
293        return Ok(path_str.into());
294    }
295    let mut stripped_segments = vec![];
296    let mut path = path_str.chars().peekable();
297    let mut segment = Vec::new();
298
299    while let Some(chr) = path.next() {
300        match chr {
301            ':' => {
302                if path.next_if_eq(&':').is_some() {
303                    let stripped_segment =
304                        strip_generics_from_path_segment(mem::take(&mut segment))?;
305                    if !stripped_segment.is_empty() {
306                        stripped_segments.push(stripped_segment);
307                    }
308                } else {
309                    return Err(MalformedGenerics::InvalidPathSeparator);
310                }
311            }
312            '<' => {
313                segment.push(chr);
314
315                match path.next() {
316                    Some('<') => {
317                        return Err(MalformedGenerics::TooManyAngleBrackets);
318                    }
319                    Some('>') => {
320                        return Err(MalformedGenerics::EmptyAngleBrackets);
321                    }
322                    Some(chr) => {
323                        segment.push(chr);
324
325                        while let Some(chr) = path.next_if(|c| *c != '>') {
326                            segment.push(chr);
327                        }
328                    }
329                    None => break,
330                }
331            }
332            _ => segment.push(chr),
333        }
334        trace!("raw segment: {:?}", segment);
335    }
336
337    if !segment.is_empty() {
338        let stripped_segment = strip_generics_from_path_segment(segment)?;
339        if !stripped_segment.is_empty() {
340            stripped_segments.push(stripped_segment);
341        }
342    }
343
344    debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
345
346    if !stripped_segments.is_empty() {
347        let stripped_path = join_path_syms(stripped_segments);
348        Ok(stripped_path.into())
349    } else {
350        Err(MalformedGenerics::MissingType)
351    }
352}
353
354/// Returns whether the first doc-comment is an inner attribute.
355///
356/// If there are no doc-comments, return true.
357/// FIXME(#78591): Support both inner and outer attributes on the same item.
358pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
359    for attr in attrs {
360        if let Some(attr_style) = attr.doc_resolution_scope() {
361            return attr_style == ast::AttrStyle::Inner;
362        }
363    }
364    true
365}
366
367/// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]` or `#[doc(attribute)]`.
368pub fn has_primitive_or_keyword_or_attribute_docs(attrs: &[impl AttributeExt]) -> bool {
369    for attr in attrs {
370        if attr.has_name(sym::rustc_doc_primitive) || attr.is_doc_keyword_or_attribute() {
371            return true;
372        }
373    }
374    false
375}
376
377/// Simplified version of the corresponding function in rustdoc.
378fn preprocess_link(link: &str) -> Box<str> {
379    // IMPORTANT: To be kept in sync with the corresponding function in rustdoc.
380    // Namely, whenever the rustdoc function returns a successful result for a given input,
381    // this function *MUST* return a link that's equal to `PreprocessingInfo.path_str`!
382
383    let link = link.replace('`', "");
384    let link = link.split('#').next().unwrap();
385    let link = link.trim();
386    let link = link.split_once('@').map_or(link, |(_, rhs)| rhs);
387    let link = link.trim_suffix("()");
388    let link = link.trim_suffix("{}");
389    let link = link.trim_suffix("[]");
390    let link = if link != "!" { link.trim_suffix('!') } else { link };
391    let link = link.trim();
392    strip_generics_from_path(link).unwrap_or_else(|_| link.into())
393}
394
395/// Keep inline and reference links `[]`,
396/// but skip autolinks `<>` which we never consider to be intra-doc links.
397pub fn may_be_doc_link(link_type: LinkType) -> bool {
398    match link_type {
399        LinkType::Inline
400        | LinkType::Reference
401        | LinkType::ReferenceUnknown
402        | LinkType::Collapsed
403        | LinkType::CollapsedUnknown
404        | LinkType::Shortcut
405        | LinkType::ShortcutUnknown => true,
406        LinkType::Autolink | LinkType::Email => false,
407    }
408}
409
410/// Simplified version of `preprocessed_markdown_links` from rustdoc.
411/// Must return at least the same links as it, but may add some more links on top of that.
412pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
413    let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
414    let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
415
416    parse_links(&doc)
417}
418
419/// Similar version of `markdown_links` from rustdoc.
420/// This will collect destination links and display text if exists.
421fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
422    let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
423    let mut event_iter = Parser::new_with_broken_link_callback(
424        doc,
425        main_body_opts(),
426        Some(&mut broken_link_callback),
427    );
428    let mut links = Vec::new();
429
430    let mut refids = UnordSet::default();
431
432    while let Some(event) = event_iter.next() {
433        match event {
434            Event::Start(Tag::Link { link_type, dest_url, title: _, id })
435                if may_be_doc_link(link_type) =>
436            {
437                if matches!(
438                    link_type,
439                    LinkType::Inline
440                        | LinkType::ReferenceUnknown
441                        | LinkType::Reference
442                        | LinkType::Shortcut
443                        | LinkType::ShortcutUnknown
444                ) {
445                    if let Some(display_text) = collect_link_data(&mut event_iter) {
446                        links.push(display_text);
447                    }
448                }
449                if matches!(
450                    link_type,
451                    LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
452                ) {
453                    refids.insert(id);
454                }
455
456                links.push(preprocess_link(&dest_url));
457            }
458            _ => {}
459        }
460    }
461
462    for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
463        if !refids.contains(label) {
464            links.push(preprocess_link(&refdef.dest));
465        }
466    }
467
468    links
469}
470
471/// Collects additional data of link.
472fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
473    event_iter: &mut Parser<'input, F>,
474) -> Option<Box<str>> {
475    let mut display_text: Option<String> = None;
476    let mut append_text = |text: CowStr<'_>| {
477        if let Some(display_text) = &mut display_text {
478            display_text.push_str(&text);
479        } else {
480            display_text = Some(text.to_string());
481        }
482    };
483
484    while let Some(event) = event_iter.next() {
485        match event {
486            Event::Text(text) => {
487                append_text(text);
488            }
489            Event::Code(code) => {
490                append_text(code);
491            }
492            Event::End(_) => {
493                break;
494            }
495            _ => {}
496        }
497    }
498
499    display_text.map(String::into_boxed_str)
500}
501
502/// Returns a span encompassing all the document fragments.
503pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
504    let (first_fragment, last_fragment) = match fragments {
505        [] => return None,
506        [first, .., last] => (first, last),
507        [first] => (first, first),
508    };
509    if first_fragment.span == DUMMY_SP {
510        return None;
511    }
512    Some(first_fragment.span.to(last_fragment.span))
513}
514
515/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
516///
517/// This method does not always work, because markdown bytes don't necessarily match source bytes,
518/// like if escapes are used in the string. In this case, it returns `None`.
519///
520/// `markdown` is typically the entire documentation for an item,
521/// after combining fragments.
522///
523/// This method will return `Some` only if one of the following is true:
524///
525/// - The doc is made entirely from sugared doc comments, which cannot contain escapes
526/// - The doc is entirely from a single doc fragment with a string literal exactly equal to
527///   `markdown`.
528/// - The doc comes from `include_str!`
529/// - The doc includes exactly one substring matching `markdown[md_range]` which is contained in a
530///   single doc fragment.
531///
532/// This function is defined in the compiler so it can be used by both `rustdoc` and `clippy`.
533///
534/// It returns a tuple containing a span encompassing all the document fragments and a boolean that
535/// is `true` if any of the *matched* fragments are from a macro expansion.
536pub fn source_span_for_markdown_range(
537    tcx: TyCtxt<'_>,
538    markdown: &str,
539    md_range: &Range<usize>,
540    fragments: &[DocFragment],
541) -> Option<(Span, bool)> {
542    let map = tcx.sess.source_map();
543    source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
544}
545
546// inner function used for unit testing
547pub fn source_span_for_markdown_range_inner(
548    map: &SourceMap,
549    markdown: &str,
550    md_range: &Range<usize>,
551    fragments: &[DocFragment],
552) -> Option<(Span, bool)> {
553    use rustc_span::BytePos;
554
555    if let &[fragment] = &fragments
556        && !fragment.kind.is_sugared()
557        && let Ok(snippet) = map.span_to_snippet(fragment.span)
558        && snippet.trim_end() == markdown.trim_end()
559        && let Ok(md_range_lo) = u32::try_from(md_range.start)
560        && let Ok(md_range_hi) = u32::try_from(md_range.end)
561    {
562        // Single fragment with string that contains same bytes as doc.
563        return Some((
564            Span::new(
565                fragment.span.lo() + rustc_span::BytePos(md_range_lo),
566                fragment.span.lo() + rustc_span::BytePos(md_range_hi),
567                fragment.span.ctxt(),
568                fragment.span.parent(),
569            ),
570            fragment.from_expansion,
571        ));
572    }
573
574    let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind.is_sugared());
575
576    if !is_all_sugared_doc {
577        // This case ignores the markdown outside of the range so that it can
578        // work in cases where the markdown is made from several different
579        // doc fragments, but the target range does not span across multiple
580        // fragments.
581        let mut match_data = None;
582        let pat = &markdown[md_range.clone()];
583        // This heirustic doesn't make sense with a zero-sized range.
584        if pat.is_empty() {
585            return None;
586        }
587        for (i, fragment) in fragments.iter().enumerate() {
588            if let Ok(snippet) = map.span_to_snippet(fragment.span)
589                && let Some(match_start) = snippet.find(pat)
590            {
591                // If there is either a match in a previous fragment, or
592                // multiple matches in this fragment, there is ambiguity.
593                // the snippet cannot be zero-sized, because it matches
594                // the pattern, which is checked to not be zero sized.
595                if match_data.is_none()
596                    && !snippet.as_bytes()[match_start + 1..]
597                        .windows(pat.len())
598                        .any(|s| s == pat.as_bytes())
599                {
600                    match_data = Some((i, match_start));
601                } else {
602                    // Heuristic produced ambiguity, return nothing.
603                    return None;
604                }
605            }
606        }
607        if let Some((i, match_start)) = match_data {
608            let fragment = &fragments[i];
609            let sp = fragment.span;
610            // we need to calculate the span start,
611            // then use that in our calculations for the span end
612            let lo = sp.lo() + BytePos(match_start as u32);
613            return Some((
614                sp.with_lo(lo).with_hi(lo + BytePos((md_range.end - md_range.start) as u32)),
615                fragment.from_expansion,
616            ));
617        }
618        return None;
619    }
620
621    let snippet = map.span_to_snippet(span_of_fragments(fragments)?).ok()?;
622
623    let starting_line = markdown[..md_range.start].matches('\n').count();
624    let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
625
626    // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat
627    // CRLF and LF line endings the same way.
628    let mut src_lines = snippet.split_terminator('\n');
629    let md_lines = markdown.split_terminator('\n');
630
631    // The number of bytes from the source span to the markdown span that are not part
632    // of the markdown, like comment markers.
633    let mut start_bytes = 0;
634    let mut end_bytes = 0;
635
636    'outer: for (line_no, md_line) in md_lines.enumerate() {
637        loop {
638            let source_line = src_lines.next()?;
639            match source_line.find(md_line) {
640                Some(offset) => {
641                    if line_no == starting_line {
642                        start_bytes += offset;
643
644                        if starting_line == ending_line {
645                            break 'outer;
646                        }
647                    } else if line_no == ending_line {
648                        end_bytes += offset;
649                        break 'outer;
650                    } else if line_no < starting_line {
651                        start_bytes += source_line.len() - md_line.len();
652                    } else {
653                        end_bytes += source_line.len() - md_line.len();
654                    }
655                    break;
656                }
657                None => {
658                    // Since this is a source line that doesn't include a markdown line,
659                    // we have to count the newline that we split from earlier.
660                    if line_no <= starting_line {
661                        start_bytes += source_line.len() + 1;
662                    } else {
663                        end_bytes += source_line.len() + 1;
664                    }
665                }
666            }
667        }
668    }
669
670    let span = span_of_fragments(fragments)?;
671    let src_span = span.from_inner(InnerSpan::new(
672        md_range.start + start_bytes,
673        md_range.end + start_bytes + end_bytes,
674    ));
675    Some((
676        src_span,
677        fragments.iter().any(|frag| frag.span.overlaps(src_span) && frag.from_expansion),
678    ))
679}