rustc_resolve/
rustdoc.rs

1use std::mem;
2use std::ops::Range;
3
4use pulldown_cmark::{
5    BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
6};
7use rustc_ast as ast;
8use rustc_ast::attr::AttributeExt;
9use rustc_ast::util::comments::beautify_doc_string;
10use rustc_data_structures::fx::FxIndexMap;
11use rustc_middle::ty::TyCtxt;
12use rustc_span::def_id::DefId;
13use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, kw, sym};
14use thin_vec::ThinVec;
15use tracing::{debug, trace};
16
17#[derive(Clone, Copy, PartialEq, Eq, Debug)]
18pub enum DocFragmentKind {
19    /// A doc fragment created from a `///` or `//!` doc comment.
20    SugaredDoc,
21    /// A doc fragment created from a "raw" `#[doc=""]` attribute.
22    RawDoc,
23}
24
25/// A portion of documentation, extracted from a `#[doc]` attribute.
26///
27/// Each variant contains the line number within the complete doc-comment where the fragment
28/// starts, as well as the Span where the corresponding doc comment or attribute is located.
29///
30/// Included files are kept separate from inline doc comments so that proper line-number
31/// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
32/// kept separate because of issue #42760.
33#[derive(Clone, PartialEq, Eq, Debug)]
34pub struct DocFragment {
35    pub span: Span,
36    /// The item this doc-comment came from.
37    /// Used to determine the scope in which doc links in this fragment are resolved.
38    /// Typically filled for reexport docs when they are merged into the docs of the
39    /// original reexported item.
40    /// If the id is not filled, which happens for the original reexported item, then
41    /// it has to be taken from somewhere else during doc link resolution.
42    pub item_id: Option<DefId>,
43    pub doc: Symbol,
44    pub kind: DocFragmentKind,
45    pub indent: usize,
46}
47
48#[derive(Clone, Copy, Debug)]
49pub enum MalformedGenerics {
50    /// This link has unbalanced angle brackets.
51    ///
52    /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
53    UnbalancedAngleBrackets,
54    /// The generics are not attached to a type.
55    ///
56    /// For example, `<T>` should trigger this.
57    ///
58    /// This is detected by checking if the path is empty after the generics are stripped.
59    MissingType,
60    /// The link uses fully-qualified syntax, which is currently unsupported.
61    ///
62    /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
63    ///
64    /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
65    /// angle brackets.
66    HasFullyQualifiedSyntax,
67    /// The link has an invalid path separator.
68    ///
69    /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
70    /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
71    /// called.
72    ///
73    /// Note that this will also **not** be triggered if the invalid path separator is inside angle
74    /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
75    /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
76    ///
77    /// This is detected by checking if there is a colon followed by a non-colon in the link.
78    InvalidPathSeparator,
79    /// The link has too many angle brackets.
80    ///
81    /// For example, `Vec<<T>>` should trigger this.
82    TooManyAngleBrackets,
83    /// The link has empty angle brackets.
84    ///
85    /// For example, `Vec<>` should trigger this.
86    EmptyAngleBrackets,
87}
88
89/// Removes excess indentation on comments in order for the Markdown
90/// to be parsed correctly. This is necessary because the convention for
91/// writing documentation is to provide a space between the /// or //! marker
92/// and the doc text, but Markdown is whitespace-sensitive. For example,
93/// a block of text with four-space indentation is parsed as a code block,
94/// so if we didn't unindent comments, these list items
95///
96/// /// A list:
97/// ///
98/// ///    - Foo
99/// ///    - Bar
100///
101/// would be parsed as if they were in a code block, which is likely not what the user intended.
102pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
103    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
104    // fragments kind's lines are never starting with a whitespace unless they are using some
105    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
106    // we need to take into account the fact that the minimum indent minus one (to take this
107    // whitespace into account).
108    //
109    // For example:
110    //
111    // /// hello!
112    // #[doc = "another"]
113    //
114    // In this case, you want "hello! another" and not "hello!  another".
115    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
116        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
117    {
118        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
119        // "decide" how much the minimum indent will be.
120        1
121    } else {
122        0
123    };
124
125    // `min_indent` is used to know how much whitespaces from the start of each lines must be
126    // removed. Example:
127    //
128    // ///     hello!
129    // #[doc = "another"]
130    //
131    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
132    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
133    // (5 - 1) whitespaces.
134    let Some(min_indent) = docs
135        .iter()
136        .map(|fragment| {
137            fragment
138                .doc
139                .as_str()
140                .lines()
141                .filter(|line| line.chars().any(|c| !c.is_whitespace()))
142                .map(|line| {
143                    // Compare against either space or tab, ignoring whether they are
144                    // mixed or not.
145                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
146                    whitespace
147                        + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
148                })
149                .min()
150                .unwrap_or(usize::MAX)
151        })
152        .min()
153    else {
154        return;
155    };
156
157    for fragment in docs {
158        if fragment.doc == kw::Empty {
159            continue;
160        }
161
162        let indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
163            min_indent - add
164        } else {
165            min_indent
166        };
167
168        fragment.indent = indent;
169    }
170}
171
172/// The goal of this function is to apply the `DocFragment` transformation that is required when
173/// transforming into the final Markdown, which is applying the computed indent to each line in
174/// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
175///
176/// Note: remove the trailing newline where appropriate
177pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
178    if frag.doc == kw::Empty {
179        out.push('\n');
180        return;
181    }
182    let s = frag.doc.as_str();
183    let mut iter = s.lines();
184
185    while let Some(line) = iter.next() {
186        if line.chars().any(|c| !c.is_whitespace()) {
187            assert!(line.len() >= frag.indent);
188            out.push_str(&line[frag.indent..]);
189        } else {
190            out.push_str(line);
191        }
192        out.push('\n');
193    }
194}
195
196pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
197    attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
198    doc_only: bool,
199) -> (Vec<DocFragment>, ThinVec<A>) {
200    let mut doc_fragments = Vec::new();
201    let mut other_attrs = ThinVec::<A>::new();
202    for (attr, item_id) in attrs {
203        if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
204            let doc = beautify_doc_string(doc_str, comment_kind);
205            let (span, kind) = if attr.is_doc_comment() {
206                (attr.span(), DocFragmentKind::SugaredDoc)
207            } else {
208                (
209                    attr.value_span()
210                        .map(|i| i.with_ctxt(attr.span().ctxt()))
211                        .unwrap_or(attr.span()),
212                    DocFragmentKind::RawDoc,
213                )
214            };
215            let fragment = DocFragment { span, doc, kind, item_id, indent: 0 };
216            doc_fragments.push(fragment);
217        } else if !doc_only {
218            other_attrs.push(attr.clone());
219        }
220    }
221
222    unindent_doc_fragments(&mut doc_fragments);
223
224    (doc_fragments, other_attrs)
225}
226
227/// Return the doc-comments on this item, grouped by the module they came from.
228/// The module can be different if this is a re-export with added documentation.
229///
230/// The last newline is not trimmed so the produced strings are reusable between
231/// early and late doc link resolution regardless of their position.
232pub fn prepare_to_doc_link_resolution(
233    doc_fragments: &[DocFragment],
234) -> FxIndexMap<Option<DefId>, String> {
235    let mut res = FxIndexMap::default();
236    for fragment in doc_fragments {
237        let out_str = res.entry(fragment.item_id).or_default();
238        add_doc_fragment(out_str, fragment);
239    }
240    res
241}
242
243/// Options for rendering Markdown in the main body of documentation.
244pub fn main_body_opts() -> Options {
245    Options::ENABLE_TABLES
246        | Options::ENABLE_FOOTNOTES
247        | Options::ENABLE_STRIKETHROUGH
248        | Options::ENABLE_TASKLISTS
249        | Options::ENABLE_SMART_PUNCTUATION
250}
251
252fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
253    let mut stripped_segment = String::new();
254    let mut param_depth = 0;
255
256    let mut latest_generics_chunk = String::new();
257
258    for c in segment {
259        if c == '<' {
260            param_depth += 1;
261            latest_generics_chunk.clear();
262        } else if c == '>' {
263            param_depth -= 1;
264            if latest_generics_chunk.contains(" as ") {
265                // The segment tries to use fully-qualified syntax, which is currently unsupported.
266                // Give a helpful error message instead of completely ignoring the angle brackets.
267                return Err(MalformedGenerics::HasFullyQualifiedSyntax);
268            }
269        } else if param_depth == 0 {
270            stripped_segment.push(c);
271        } else {
272            latest_generics_chunk.push(c);
273        }
274    }
275
276    if param_depth == 0 {
277        Ok(stripped_segment)
278    } else {
279        // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
280        Err(MalformedGenerics::UnbalancedAngleBrackets)
281    }
282}
283
284pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
285    if !path_str.contains(['<', '>']) {
286        return Ok(path_str.into());
287    }
288    let mut stripped_segments = vec![];
289    let mut path = path_str.chars().peekable();
290    let mut segment = Vec::new();
291
292    while let Some(chr) = path.next() {
293        match chr {
294            ':' => {
295                if path.next_if_eq(&':').is_some() {
296                    let stripped_segment =
297                        strip_generics_from_path_segment(mem::take(&mut segment))?;
298                    if !stripped_segment.is_empty() {
299                        stripped_segments.push(stripped_segment);
300                    }
301                } else {
302                    return Err(MalformedGenerics::InvalidPathSeparator);
303                }
304            }
305            '<' => {
306                segment.push(chr);
307
308                match path.next() {
309                    Some('<') => {
310                        return Err(MalformedGenerics::TooManyAngleBrackets);
311                    }
312                    Some('>') => {
313                        return Err(MalformedGenerics::EmptyAngleBrackets);
314                    }
315                    Some(chr) => {
316                        segment.push(chr);
317
318                        while let Some(chr) = path.next_if(|c| *c != '>') {
319                            segment.push(chr);
320                        }
321                    }
322                    None => break,
323                }
324            }
325            _ => segment.push(chr),
326        }
327        trace!("raw segment: {:?}", segment);
328    }
329
330    if !segment.is_empty() {
331        let stripped_segment = strip_generics_from_path_segment(segment)?;
332        if !stripped_segment.is_empty() {
333            stripped_segments.push(stripped_segment);
334        }
335    }
336
337    debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
338
339    let stripped_path = stripped_segments.join("::");
340
341    if !stripped_path.is_empty() {
342        Ok(stripped_path.into())
343    } else {
344        Err(MalformedGenerics::MissingType)
345    }
346}
347
348/// Returns whether the first doc-comment is an inner attribute.
349///
350/// If there are no doc-comments, return true.
351/// FIXME(#78591): Support both inner and outer attributes on the same item.
352pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
353    attrs.iter().find(|a| a.doc_str().is_some()).is_none_or(|a| a.style() == ast::AttrStyle::Inner)
354}
355
356/// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]`.
357pub fn has_primitive_or_keyword_docs(attrs: &[impl AttributeExt]) -> bool {
358    for attr in attrs {
359        if attr.has_name(sym::rustc_doc_primitive) {
360            return true;
361        } else if attr.has_name(sym::doc)
362            && let Some(items) = attr.meta_item_list()
363        {
364            for item in items {
365                if item.has_name(sym::keyword) {
366                    return true;
367                }
368            }
369        }
370    }
371    false
372}
373
374/// Simplified version of the corresponding function in rustdoc.
375/// If the rustdoc version returns a successful result, this function must return the same result.
376/// Otherwise this function may return anything.
377fn preprocess_link(link: &str) -> Box<str> {
378    let link = link.replace('`', "");
379    let link = link.split('#').next().unwrap();
380    let link = link.trim();
381    let link = link.rsplit('@').next().unwrap();
382    let link = link.strip_suffix("()").unwrap_or(link);
383    let link = link.strip_suffix("{}").unwrap_or(link);
384    let link = link.strip_suffix("[]").unwrap_or(link);
385    let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
386    let link = link.trim();
387    strip_generics_from_path(link).unwrap_or_else(|_| link.into())
388}
389
390/// Keep inline and reference links `[]`,
391/// but skip autolinks `<>` which we never consider to be intra-doc links.
392pub fn may_be_doc_link(link_type: LinkType) -> bool {
393    match link_type {
394        LinkType::Inline
395        | LinkType::Reference
396        | LinkType::ReferenceUnknown
397        | LinkType::Collapsed
398        | LinkType::CollapsedUnknown
399        | LinkType::Shortcut
400        | LinkType::ShortcutUnknown => true,
401        LinkType::Autolink | LinkType::Email => false,
402    }
403}
404
405/// Simplified version of `preprocessed_markdown_links` from rustdoc.
406/// Must return at least the same links as it, but may add some more links on top of that.
407pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
408    let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
409    let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
410
411    parse_links(&doc)
412}
413
414/// Similar version of `markdown_links` from rustdoc.
415/// This will collect destination links and display text if exists.
416fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
417    let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
418    let mut event_iter = Parser::new_with_broken_link_callback(
419        doc,
420        main_body_opts(),
421        Some(&mut broken_link_callback),
422    );
423    let mut links = Vec::new();
424
425    while let Some(event) = event_iter.next() {
426        match event {
427            Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ })
428                if may_be_doc_link(link_type) =>
429            {
430                if matches!(
431                    link_type,
432                    LinkType::Inline
433                        | LinkType::ReferenceUnknown
434                        | LinkType::Reference
435                        | LinkType::Shortcut
436                        | LinkType::ShortcutUnknown
437                ) {
438                    if let Some(display_text) = collect_link_data(&mut event_iter) {
439                        links.push(display_text);
440                    }
441                }
442
443                links.push(preprocess_link(&dest_url));
444            }
445            _ => {}
446        }
447    }
448
449    links
450}
451
452/// Collects additional data of link.
453fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
454    event_iter: &mut Parser<'input, F>,
455) -> Option<Box<str>> {
456    let mut display_text: Option<String> = None;
457    let mut append_text = |text: CowStr<'_>| {
458        if let Some(display_text) = &mut display_text {
459            display_text.push_str(&text);
460        } else {
461            display_text = Some(text.to_string());
462        }
463    };
464
465    while let Some(event) = event_iter.next() {
466        match event {
467            Event::Text(text) => {
468                append_text(text);
469            }
470            Event::Code(code) => {
471                append_text(code);
472            }
473            Event::End(_) => {
474                break;
475            }
476            _ => {}
477        }
478    }
479
480    display_text.map(String::into_boxed_str)
481}
482
483/// Returns a span encompassing all the document fragments.
484pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
485    if fragments.is_empty() {
486        return None;
487    }
488    let start = fragments[0].span;
489    if start == DUMMY_SP {
490        return None;
491    }
492    let end = fragments.last().expect("no doc strings provided").span;
493    Some(start.to(end))
494}
495
496/// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
497///
498/// This method does not always work, because markdown bytes don't necessarily match source bytes,
499/// like if escapes are used in the string. In this case, it returns `None`.
500///
501/// This method will return `Some` only if:
502///
503/// - The doc is made entirely from sugared doc comments, which cannot contain escapes
504/// - The doc is entirely from a single doc fragment, with a string literal, exactly equal
505/// - The doc comes from `include_str!`
506pub fn source_span_for_markdown_range(
507    tcx: TyCtxt<'_>,
508    markdown: &str,
509    md_range: &Range<usize>,
510    fragments: &[DocFragment],
511) -> Option<Span> {
512    if let &[fragment] = &fragments
513        && fragment.kind == DocFragmentKind::RawDoc
514        && let Ok(snippet) = tcx.sess.source_map().span_to_snippet(fragment.span)
515        && snippet.trim_end() == markdown.trim_end()
516        && let Ok(md_range_lo) = u32::try_from(md_range.start)
517        && let Ok(md_range_hi) = u32::try_from(md_range.end)
518    {
519        // Single fragment with string that contains same bytes as doc.
520        return Some(Span::new(
521            fragment.span.lo() + rustc_span::BytePos(md_range_lo),
522            fragment.span.lo() + rustc_span::BytePos(md_range_hi),
523            fragment.span.ctxt(),
524            fragment.span.parent(),
525        ));
526    }
527
528    let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
529
530    if !is_all_sugared_doc {
531        return None;
532    }
533
534    let snippet = tcx.sess.source_map().span_to_snippet(span_of_fragments(fragments)?).ok()?;
535
536    let starting_line = markdown[..md_range.start].matches('\n').count();
537    let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
538
539    // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat
540    // CRLF and LF line endings the same way.
541    let mut src_lines = snippet.split_terminator('\n');
542    let md_lines = markdown.split_terminator('\n');
543
544    // The number of bytes from the source span to the markdown span that are not part
545    // of the markdown, like comment markers.
546    let mut start_bytes = 0;
547    let mut end_bytes = 0;
548
549    'outer: for (line_no, md_line) in md_lines.enumerate() {
550        loop {
551            let source_line = src_lines.next()?;
552            match source_line.find(md_line) {
553                Some(offset) => {
554                    if line_no == starting_line {
555                        start_bytes += offset;
556
557                        if starting_line == ending_line {
558                            break 'outer;
559                        }
560                    } else if line_no == ending_line {
561                        end_bytes += offset;
562                        break 'outer;
563                    } else if line_no < starting_line {
564                        start_bytes += source_line.len() - md_line.len();
565                    } else {
566                        end_bytes += source_line.len() - md_line.len();
567                    }
568                    break;
569                }
570                None => {
571                    // Since this is a source line that doesn't include a markdown line,
572                    // we have to count the newline that we split from earlier.
573                    if line_no <= starting_line {
574                        start_bytes += source_line.len() + 1;
575                    } else {
576                        end_bytes += source_line.len() + 1;
577                    }
578                }
579            }
580        }
581    }
582
583    Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
584        md_range.start + start_bytes,
585        md_range.end + start_bytes + end_bytes,
586    )))
587}