rustc_resolve/
rustdoc.rs

1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5use pulldown_cmark::{
6    BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
7};
8use rustc_ast as ast;
9use rustc_ast::attr::AttributeExt;
10use rustc_ast::util::comments::beautify_doc_string;
11use rustc_data_structures::fx::FxIndexMap;
12use rustc_data_structures::unord::UnordSet;
13use rustc_middle::ty::TyCtxt;
14use rustc_span::def_id::DefId;
15use rustc_span::source_map::SourceMap;
16use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
17use thin_vec::ThinVec;
18use tracing::{debug, trace};
19
20#[cfg(test)]
21mod tests;
22
23#[derive(Clone, Copy, PartialEq, Eq, Debug)]
24pub enum DocFragmentKind {
25    /// A doc fragment created from a `///` or `//!` doc comment.
26    SugaredDoc,
27    /// A doc fragment created from a "raw" `#[doc=""]` attribute.
28    RawDoc,
29}
30
31/// A portion of documentation, extracted from a `#[doc]` attribute.
32///
33/// Each variant contains the line number within the complete doc-comment where the fragment
34/// starts, as well as the Span where the corresponding doc comment or attribute is located.
35///
36/// Included files are kept separate from inline doc comments so that proper line-number
37/// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
38/// kept separate because of issue #42760.
39#[derive(Clone, PartialEq, Eq, Debug)]
40pub struct DocFragment {
41    pub span: Span,
42    /// The item this doc-comment came from.
43    /// Used to determine the scope in which doc links in this fragment are resolved.
44    /// Typically filled for reexport docs when they are merged into the docs of the
45    /// original reexported item.
46    /// If the id is not filled, which happens for the original reexported item, then
47    /// it has to be taken from somewhere else during doc link resolution.
48    pub item_id: Option<DefId>,
49    pub doc: Symbol,
50    pub kind: DocFragmentKind,
51    pub indent: usize,
52}
53
54#[derive(Clone, Copy, Debug)]
55pub enum MalformedGenerics {
56    /// This link has unbalanced angle brackets.
57    ///
58    /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
59    UnbalancedAngleBrackets,
60    /// The generics are not attached to a type.
61    ///
62    /// For example, `<T>` should trigger this.
63    ///
64    /// This is detected by checking if the path is empty after the generics are stripped.
65    MissingType,
66    /// The link uses fully-qualified syntax, which is currently unsupported.
67    ///
68    /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
69    ///
70    /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
71    /// angle brackets.
72    HasFullyQualifiedSyntax,
73    /// The link has an invalid path separator.
74    ///
75    /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
76    /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
77    /// called.
78    ///
79    /// Note that this will also **not** be triggered if the invalid path separator is inside angle
80    /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
81    /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
82    ///
83    /// This is detected by checking if there is a colon followed by a non-colon in the link.
84    InvalidPathSeparator,
85    /// The link has too many angle brackets.
86    ///
87    /// For example, `Vec<<T>>` should trigger this.
88    TooManyAngleBrackets,
89    /// The link has empty angle brackets.
90    ///
91    /// For example, `Vec<>` should trigger this.
92    EmptyAngleBrackets,
93}
94
95/// Removes excess indentation on comments in order for the Markdown
96/// to be parsed correctly. This is necessary because the convention for
97/// writing documentation is to provide a space between the /// or //! marker
98/// and the doc text, but Markdown is whitespace-sensitive. For example,
99/// a block of text with four-space indentation is parsed as a code block,
100/// so if we didn't unindent comments, these list items
101///
102/// /// A list:
103/// ///
104/// ///    - Foo
105/// ///    - Bar
106///
107/// would be parsed as if they were in a code block, which is likely not what the user intended.
108pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
109    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
110    // fragments kind's lines are never starting with a whitespace unless they are using some
111    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
112    // we need to take into account the fact that the minimum indent minus one (to take this
113    // whitespace into account).
114    //
115    // For example:
116    //
117    // /// hello!
118    // #[doc = "another"]
119    //
120    // In this case, you want "hello! another" and not "hello!  another".
121    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
122        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
123    {
124        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
125        // "decide" how much the minimum indent will be.
126        1
127    } else {
128        0
129    };
130
131    // `min_indent` is used to know how much whitespaces from the start of each lines must be
132    // removed. Example:
133    //
134    // ///     hello!
135    // #[doc = "another"]
136    //
137    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
138    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
139    // (5 - 1) whitespaces.
140    let Some(min_indent) = docs
141        .iter()
142        .map(|fragment| {
143            fragment
144                .doc
145                .as_str()
146                .lines()
147                .filter(|line| line.chars().any(|c| !c.is_whitespace()))
148                .map(|line| {
149                    // Compare against either space or tab, ignoring whether they are
150                    // mixed or not.
151                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
152                    whitespace
153                        + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
154                })
155                .min()
156                .unwrap_or(usize::MAX)
157        })
158        .min()
159    else {
160        return;
161    };
162
163    for fragment in docs {
164        if fragment.doc == sym::empty {
165            continue;
166        }
167
168        let indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
169            min_indent - add
170        } else {
171            min_indent
172        };
173
174        fragment.indent = indent;
175    }
176}
177
178/// The goal of this function is to apply the `DocFragment` transformation that is required when
179/// transforming into the final Markdown, which is applying the computed indent to each line in
180/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
181///
182/// Note: remove the trailing newline where appropriate
183pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
184    if frag.doc == sym::empty {
185        out.push('\n');
186        return;
187    }
188    let s = frag.doc.as_str();
189    let mut iter = s.lines();
190
191    while let Some(line) = iter.next() {
192        if line.chars().any(|c| !c.is_whitespace()) {
193            assert!(line.len() >= frag.indent);
194            out.push_str(&line[frag.indent..]);
195        } else {
196            out.push_str(line);
197        }
198        out.push('\n');
199    }
200}
201
202pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
203    attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
204    doc_only: bool,
205) -> (Vec<DocFragment>, ThinVec<A>) {
206    let mut doc_fragments = Vec::new();
207    let mut other_attrs = ThinVec::<A>::new();
208    for (attr, item_id) in attrs {
209        if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
210            let doc = beautify_doc_string(doc_str, comment_kind);
211            let (span, kind) = if attr.is_doc_comment() {
212                (attr.span(), DocFragmentKind::SugaredDoc)
213            } else {
214                (
215                    attr.value_span()
216                        .map(|i| i.with_ctxt(attr.span().ctxt()))
217                        .unwrap_or(attr.span()),
218                    DocFragmentKind::RawDoc,
219                )
220            };
221            let fragment = DocFragment { span, doc, kind, item_id, indent: 0 };
222            doc_fragments.push(fragment);
223        } else if !doc_only {
224            other_attrs.push(attr.clone());
225        }
226    }
227
228    unindent_doc_fragments(&mut doc_fragments);
229
230    (doc_fragments, other_attrs)
231}
232
233/// Return the doc-comments on this item, grouped by the module they came from.
234/// The module can be different if this is a re-export with added documentation.
235///
236/// The last newline is not trimmed so the produced strings are reusable between
237/// early and late doc link resolution regardless of their position.
238pub fn prepare_to_doc_link_resolution(
239    doc_fragments: &[DocFragment],
240) -> FxIndexMap<Option<DefId>, String> {
241    let mut res = FxIndexMap::default();
242    for fragment in doc_fragments {
243        let out_str = res.entry(fragment.item_id).or_default();
244        add_doc_fragment(out_str, fragment);
245    }
246    res
247}
248
249/// Options for rendering Markdown in the main body of documentation.
250pub fn main_body_opts() -> Options {
251    Options::ENABLE_TABLES
252        | Options::ENABLE_FOOTNOTES
253        | Options::ENABLE_STRIKETHROUGH
254        | Options::ENABLE_TASKLISTS
255        | Options::ENABLE_SMART_PUNCTUATION
256}
257
258fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
259    let mut stripped_segment = String::new();
260    let mut param_depth = 0;
261
262    let mut latest_generics_chunk = String::new();
263
264    for c in segment {
265        if c == '<' {
266            param_depth += 1;
267            latest_generics_chunk.clear();
268        } else if c == '>' {
269            param_depth -= 1;
270            if latest_generics_chunk.contains(" as ") {
271                // The segment tries to use fully-qualified syntax, which is currently unsupported.
272                // Give a helpful error message instead of completely ignoring the angle brackets.
273                return Err(MalformedGenerics::HasFullyQualifiedSyntax);
274            }
275        } else if param_depth == 0 {
276            stripped_segment.push(c);
277        } else {
278            latest_generics_chunk.push(c);
279        }
280    }
281
282    if param_depth == 0 {
283        Ok(stripped_segment)
284    } else {
285        // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
286        Err(MalformedGenerics::UnbalancedAngleBrackets)
287    }
288}
289
290pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
291    if !path_str.contains(['<', '>']) {
292        return Ok(path_str.into());
293    }
294    let mut stripped_segments = vec![];
295    let mut path = path_str.chars().peekable();
296    let mut segment = Vec::new();
297
298    while let Some(chr) = path.next() {
299        match chr {
300            ':' => {
301                if path.next_if_eq(&':').is_some() {
302                    let stripped_segment =
303                        strip_generics_from_path_segment(mem::take(&mut segment))?;
304                    if !stripped_segment.is_empty() {
305                        stripped_segments.push(stripped_segment);
306                    }
307                } else {
308                    return Err(MalformedGenerics::InvalidPathSeparator);
309                }
310            }
311            '<' => {
312                segment.push(chr);
313
314                match path.next() {
315                    Some('<') => {
316                        return Err(MalformedGenerics::TooManyAngleBrackets);
317                    }
318                    Some('>') => {
319                        return Err(MalformedGenerics::EmptyAngleBrackets);
320                    }
321                    Some(chr) => {
322                        segment.push(chr);
323
324                        while let Some(chr) = path.next_if(|c| *c != '>') {
325                            segment.push(chr);
326                        }
327                    }
328                    None => break,
329                }
330            }
331            _ => segment.push(chr),
332        }
333        trace!("raw segment: {:?}", segment);
334    }
335
336    if !segment.is_empty() {
337        let stripped_segment = strip_generics_from_path_segment(segment)?;
338        if !stripped_segment.is_empty() {
339            stripped_segments.push(stripped_segment);
340        }
341    }
342
343    debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
344
345    let stripped_path = stripped_segments.join("::");
346
347    if !stripped_path.is_empty() {
348        Ok(stripped_path.into())
349    } else {
350        Err(MalformedGenerics::MissingType)
351    }
352}
353
354/// Returns whether the first doc-comment is an inner attribute.
355///
356/// If there are no doc-comments, return true.
357/// FIXME(#78591): Support both inner and outer attributes on the same item.
358pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
359    attrs.iter().find(|a| a.doc_str().is_some()).is_none_or(|a| a.style() == ast::AttrStyle::Inner)
360}
361
362/// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]`.
363pub fn has_primitive_or_keyword_docs(attrs: &[impl AttributeExt]) -> bool {
364    for attr in attrs {
365        if attr.has_name(sym::rustc_doc_primitive) {
366            return true;
367        } else if attr.has_name(sym::doc)
368            && let Some(items) = attr.meta_item_list()
369        {
370            for item in items {
371                if item.has_name(sym::keyword) {
372                    return true;
373                }
374            }
375        }
376    }
377    false
378}
379
380/// Simplified version of the corresponding function in rustdoc.
381/// If the rustdoc version returns a successful result, this function must return the same result.
382/// Otherwise this function may return anything.
383fn preprocess_link(link: &str) -> Box<str> {
384    let link = link.replace('`', "");
385    let link = link.split('#').next().unwrap();
386    let link = link.trim();
387    let link = link.rsplit('@').next().unwrap();
388    let link = link.strip_suffix("()").unwrap_or(link);
389    let link = link.strip_suffix("{}").unwrap_or(link);
390    let link = link.strip_suffix("[]").unwrap_or(link);
391    let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
392    let link = link.trim();
393    strip_generics_from_path(link).unwrap_or_else(|_| link.into())
394}
395
396/// Keep inline and reference links `[]`,
397/// but skip autolinks `<>` which we never consider to be intra-doc links.
398pub fn may_be_doc_link(link_type: LinkType) -> bool {
399    match link_type {
400        LinkType::Inline
401        | LinkType::Reference
402        | LinkType::ReferenceUnknown
403        | LinkType::Collapsed
404        | LinkType::CollapsedUnknown
405        | LinkType::Shortcut
406        | LinkType::ShortcutUnknown => true,
407        LinkType::Autolink | LinkType::Email => false,
408    }
409}
410
411/// Simplified version of `preprocessed_markdown_links` from rustdoc.
412/// Must return at least the same links as it, but may add some more links on top of that.
413pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
414    let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
415    let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
416
417    parse_links(&doc)
418}
419
420/// Similar version of `markdown_links` from rustdoc.
421/// This will collect destination links and display text if exists.
422fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
423    let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
424    let mut event_iter = Parser::new_with_broken_link_callback(
425        doc,
426        main_body_opts(),
427        Some(&mut broken_link_callback),
428    );
429    let mut links = Vec::new();
430
431    let mut refids = UnordSet::default();
432
433    while let Some(event) = event_iter.next() {
434        match event {
435            Event::Start(Tag::Link { link_type, dest_url, title: _, id })
436                if may_be_doc_link(link_type) =>
437            {
438                if matches!(
439                    link_type,
440                    LinkType::Inline
441                        | LinkType::ReferenceUnknown
442                        | LinkType::Reference
443                        | LinkType::Shortcut
444                        | LinkType::ShortcutUnknown
445                ) {
446                    if let Some(display_text) = collect_link_data(&mut event_iter) {
447                        links.push(display_text);
448                    }
449                }
450                if matches!(
451                    link_type,
452                    LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
453                ) {
454                    refids.insert(id);
455                }
456
457                links.push(preprocess_link(&dest_url));
458            }
459            _ => {}
460        }
461    }
462
463    for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
464        if !refids.contains(label) {
465            links.push(preprocess_link(&refdef.dest));
466        }
467    }
468
469    links
470}
471
472/// Collects additional data of link.
473fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
474    event_iter: &mut Parser<'input, F>,
475) -> Option<Box<str>> {
476    let mut display_text: Option<String> = None;
477    let mut append_text = |text: CowStr<'_>| {
478        if let Some(display_text) = &mut display_text {
479            display_text.push_str(&text);
480        } else {
481            display_text = Some(text.to_string());
482        }
483    };
484
485    while let Some(event) = event_iter.next() {
486        match event {
487            Event::Text(text) => {
488                append_text(text);
489            }
490            Event::Code(code) => {
491                append_text(code);
492            }
493            Event::End(_) => {
494                break;
495            }
496            _ => {}
497        }
498    }
499
500    display_text.map(String::into_boxed_str)
501}
502
503/// Returns a span encompassing all the document fragments.
504pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
505    if fragments.is_empty() {
506        return None;
507    }
508    let start = fragments[0].span;
509    if start == DUMMY_SP {
510        return None;
511    }
512    let end = fragments.last().expect("no doc strings provided").span;
513    Some(start.to(end))
514}
515
516/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
517///
518/// This method does not always work, because markdown bytes don't necessarily match source bytes,
519/// like if escapes are used in the string. In this case, it returns `None`.
520///
521/// `markdown` is typically the entire documentation for an item,
522/// after combining fragments.
523///
524/// This method will return `Some` only if one of the following is true:
525///
526/// - The doc is made entirely from sugared doc comments, which cannot contain escapes
527/// - The doc is entirely from a single doc fragment with a string literal exactly equal to `markdown`.
528/// - The doc comes from `include_str!`
529/// - The doc includes exactly one substring matching `markdown[md_range]` which is contained in a single doc fragment.
530///
531/// This function is defined in the compiler so it can be used by
532/// both `rustdoc` and `clippy`.
533pub fn source_span_for_markdown_range(
534    tcx: TyCtxt<'_>,
535    markdown: &str,
536    md_range: &Range<usize>,
537    fragments: &[DocFragment],
538) -> Option<Span> {
539    let map = tcx.sess.source_map();
540    source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
541}
542
543// inner function used for unit testing
544pub fn source_span_for_markdown_range_inner(
545    map: &SourceMap,
546    markdown: &str,
547    md_range: &Range<usize>,
548    fragments: &[DocFragment],
549) -> Option<Span> {
550    use rustc_span::BytePos;
551
552    if let &[fragment] = &fragments
553        && fragment.kind == DocFragmentKind::RawDoc
554        && let Ok(snippet) = map.span_to_snippet(fragment.span)
555        && snippet.trim_end() == markdown.trim_end()
556        && let Ok(md_range_lo) = u32::try_from(md_range.start)
557        && let Ok(md_range_hi) = u32::try_from(md_range.end)
558    {
559        // Single fragment with string that contains same bytes as doc.
560        return Some(Span::new(
561            fragment.span.lo() + rustc_span::BytePos(md_range_lo),
562            fragment.span.lo() + rustc_span::BytePos(md_range_hi),
563            fragment.span.ctxt(),
564            fragment.span.parent(),
565        ));
566    }
567
568    let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
569
570    if !is_all_sugared_doc {
571        // This case ignores the markdown outside of the range so that it can
572        // work in cases where the markdown is made from several different
573        // doc fragments, but the target range does not span across multiple
574        // fragments.
575        let mut match_data = None;
576        let pat = &markdown[md_range.clone()];
577        // This heirustic doesn't make sense with a zero-sized range.
578        if pat.is_empty() {
579            return None;
580        }
581        for (i, fragment) in fragments.iter().enumerate() {
582            if let Ok(snippet) = map.span_to_snippet(fragment.span)
583                && let Some(match_start) = snippet.find(pat)
584            {
585                // If there is either a match in a previous fragment, or
586                // multiple matches in this fragment, there is ambiguity.
587                // the snippet cannot be zero-sized, because it matches
588                // the pattern, which is checked to not be zero sized.
589                if match_data.is_none()
590                    && !snippet.as_bytes()[match_start + 1..]
591                        .windows(pat.len())
592                        .any(|s| s == pat.as_bytes())
593                {
594                    match_data = Some((i, match_start));
595                } else {
596                    // Heirustic produced ambiguity, return nothing.
597                    return None;
598                }
599            }
600        }
601        if let Some((i, match_start)) = match_data {
602            let sp = fragments[i].span;
603            // we need to calculate the span start,
604            // then use that in our calulations for the span end
605            let lo = sp.lo() + BytePos(match_start as u32);
606            return Some(
607                sp.with_lo(lo).with_hi(lo + BytePos((md_range.end - md_range.start) as u32)),
608            );
609        }
610        return None;
611    }
612
613    let snippet = map.span_to_snippet(span_of_fragments(fragments)?).ok()?;
614
615    let starting_line = markdown[..md_range.start].matches('\n').count();
616    let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
617
618    // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat
619    // CRLF and LF line endings the same way.
620    let mut src_lines = snippet.split_terminator('\n');
621    let md_lines = markdown.split_terminator('\n');
622
623    // The number of bytes from the source span to the markdown span that are not part
624    // of the markdown, like comment markers.
625    let mut start_bytes = 0;
626    let mut end_bytes = 0;
627
628    'outer: for (line_no, md_line) in md_lines.enumerate() {
629        loop {
630            let source_line = src_lines.next()?;
631            match source_line.find(md_line) {
632                Some(offset) => {
633                    if line_no == starting_line {
634                        start_bytes += offset;
635
636                        if starting_line == ending_line {
637                            break 'outer;
638                        }
639                    } else if line_no == ending_line {
640                        end_bytes += offset;
641                        break 'outer;
642                    } else if line_no < starting_line {
643                        start_bytes += source_line.len() - md_line.len();
644                    } else {
645                        end_bytes += source_line.len() - md_line.len();
646                    }
647                    break;
648                }
649                None => {
650                    // Since this is a source line that doesn't include a markdown line,
651                    // we have to count the newline that we split from earlier.
652                    if line_no <= starting_line {
653                        start_bytes += source_line.len() + 1;
654                    } else {
655                        end_bytes += source_line.len() + 1;
656                    }
657                }
658            }
659        }
660    }
661
662    Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
663        md_range.start + start_bytes,
664        md_range.end + start_bytes + end_bytes,
665    )))
666}