Skip to main content

rustdoc/html/
markdown.rs

1//! Markdown formatting for rustdoc.
2//!
3//! This module implements markdown formatting through the pulldown-cmark library.
4//!
5//! ```
6//! #![feature(rustc_private)]
7//!
8//! extern crate rustc_span;
9//!
10//! use rustc_span::edition::Edition;
11//! use rustdoc::html::markdown::{HeadingOffset, IdMap, Markdown, ErrorCodes};
12//!
13//! let s = "My *markdown* _text_";
14//! let mut id_map = IdMap::new();
15//! let md = Markdown {
16//!     content: s,
17//!     links: &[],
18//!     ids: &mut id_map,
19//!     error_codes: ErrorCodes::Yes,
20//!     edition: Edition::Edition2015,
21//!     playground: &None,
22//!     heading_offset: HeadingOffset::H2,
23//! };
24//! let mut html = String::new();
25//! md.write_into(&mut html).unwrap();
26//! // ... something using html
27//! ```
28
29use std::borrow::Cow;
30use std::collections::VecDeque;
31use std::fmt::{self, Write};
32use std::iter::Peekable;
33use std::ops::{ControlFlow, Range};
34use std::path::PathBuf;
35use std::str::{self, CharIndices};
36use std::sync::atomic::AtomicUsize;
37use std::sync::{Arc, Weak};
38
39use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
40use rustc_errors::{Diag, DiagMessage};
41use rustc_hir::def_id::LocalDefId;
42use rustc_middle::ty::TyCtxt;
43pub(crate) use rustc_resolve::rustdoc::main_body_opts;
44use rustc_resolve::rustdoc::pulldown_cmark::{
45    self, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, html,
46};
47use rustc_resolve::rustdoc::{DocFragment, may_be_doc_link, source_span_for_markdown_range};
48use rustc_span::edition::Edition;
49use rustc_span::{Span, Symbol};
50use tracing::{debug, trace};
51
52use crate::clean::RenderedLink;
53use crate::doctest;
54use crate::doctest::GlobalTestOptions;
55use crate::html::escape::{Escape, EscapeBodyText};
56use crate::html::highlight;
57use crate::html::length_limit::HtmlWithLimit;
58use crate::html::render::small_url_encode;
59use crate::html::toc::{Toc, TocBuilder};
60
61mod footnotes;
62#[cfg(test)]
63mod tests;
64
65const MAX_HEADER_LEVEL: u32 = 6;
66
67/// Options for rendering Markdown in summaries (e.g., in search results).
68pub(crate) fn summary_opts() -> Options {
69    Options::ENABLE_TABLES
70        | Options::ENABLE_FOOTNOTES
71        | Options::ENABLE_STRIKETHROUGH
72        | Options::ENABLE_TASKLISTS
73        | Options::ENABLE_SMART_PUNCTUATION
74}
75
76#[derive(Debug, Clone, Copy)]
77pub enum HeadingOffset {
78    H1 = 0,
79    H2,
80    H3,
81    H4,
82    H5,
83    H6,
84}
85
86/// When `to_string` is called, this struct will emit the HTML corresponding to
87/// the rendered version of the contained markdown string.
88pub struct Markdown<'a> {
89    pub content: &'a str,
90    /// A list of link replacements.
91    pub links: &'a [RenderedLink],
92    /// The current list of used header IDs.
93    pub ids: &'a mut IdMap,
94    /// Whether to allow the use of explicit error codes in doctest lang strings.
95    pub error_codes: ErrorCodes,
96    /// Default edition to use when parsing doctests (to add a `fn main`).
97    pub edition: Edition,
98    pub playground: &'a Option<Playground>,
99    /// Offset at which we render headings.
100    /// E.g. if `heading_offset: HeadingOffset::H2`, then `# something` renders an `<h2>`.
101    pub heading_offset: HeadingOffset,
102}
103/// A struct like `Markdown` that renders the markdown with a table of contents.
104pub(crate) struct MarkdownWithToc<'a> {
105    pub(crate) content: &'a str,
106    pub(crate) links: &'a [RenderedLink],
107    pub(crate) ids: &'a mut IdMap,
108    pub(crate) error_codes: ErrorCodes,
109    pub(crate) edition: Edition,
110    pub(crate) playground: &'a Option<Playground>,
111}
112
113/// A struct like `Markdown` that renders the markdown escaping HTML tags
114/// and includes no paragraph tags.
115pub(crate) struct MarkdownItemInfo<'a> {
116    pub(crate) content: &'a str,
117    pub(crate) links: &'a [RenderedLink],
118    pub(crate) ids: &'a mut IdMap,
119}
120
121/// A tuple struct like `Markdown` that renders only the first paragraph.
122pub(crate) struct MarkdownSummaryLine<'a>(pub &'a str, pub &'a [RenderedLink]);
123
124#[derive(Copy, Clone, PartialEq, Debug)]
125pub enum ErrorCodes {
126    Yes,
127    No,
128}
129
130impl ErrorCodes {
131    pub(crate) fn from(b: bool) -> Self {
132        match b {
133            true => ErrorCodes::Yes,
134            false => ErrorCodes::No,
135        }
136    }
137
138    pub(crate) fn as_bool(self) -> bool {
139        match self {
140            ErrorCodes::Yes => true,
141            ErrorCodes::No => false,
142        }
143    }
144}
145
146/// Controls whether a line will be hidden or shown in HTML output.
147///
148/// All lines are used in documentation tests.
149pub(crate) enum Line<'a> {
150    Hidden(&'a str),
151    Shown(Cow<'a, str>),
152}
153
154impl<'a> Line<'a> {
155    fn for_html(self) -> Option<Cow<'a, str>> {
156        match self {
157            Line::Shown(l) => Some(l),
158            Line::Hidden(_) => None,
159        }
160    }
161
162    pub(crate) fn for_code(self) -> Cow<'a, str> {
163        match self {
164            Line::Shown(l) => l,
165            Line::Hidden(l) => Cow::Borrowed(l),
166        }
167    }
168}
169
170/// This function is used to handle the "hidden lines" (ie starting with `#`) in
171/// doctests. It also transforms `##` back into `#`.
172// FIXME: There is a minor inconsistency here. For lines that start with ##, we
173// have no easy way of removing a potential single space after the hashes, which
174// is done in the single # case. This inconsistency seems okay, if non-ideal. In
175// order to fix it we'd have to iterate to find the first non-# character, and
176// then reallocate to remove it; which would make us return a String.
177pub(crate) fn map_line(s: &str) -> Line<'_> {
178    let trimmed = s.trim();
179    if trimmed.starts_with("##") {
180        Line::Shown(Cow::Owned(s.replacen("##", "#", 1)))
181    } else if let Some(stripped) = trimmed.strip_prefix("# ") {
182        // # text
183        Line::Hidden(stripped)
184    } else if trimmed == "#" {
185        // We cannot handle '#text' because it could be #[attr].
186        Line::Hidden("")
187    } else {
188        Line::Shown(Cow::Borrowed(s))
189    }
190}
191
192/// Convert chars from a title for an id.
193///
194/// "Hello, world!" -> "hello-world"
195fn slugify(c: char) -> Option<char> {
196    if c.is_alphanumeric() || c == '-' || c == '_' {
197        if c.is_ascii() { Some(c.to_ascii_lowercase()) } else { Some(c) }
198    } else if c.is_whitespace() && c.is_ascii() {
199        Some('-')
200    } else {
201        None
202    }
203}
204
205#[derive(Debug)]
206pub struct Playground {
207    pub crate_name: Option<Symbol>,
208    pub url: String,
209}
210
211/// Adds syntax highlighting and playground Run buttons to Rust code blocks.
212struct CodeBlocks<'p, 'a, I: Iterator<Item = Event<'a>>> {
213    inner: I,
214    check_error_codes: ErrorCodes,
215    edition: Edition,
216    // Information about the playground if a URL has been specified, containing an
217    // optional crate name and the URL.
218    playground: &'p Option<Playground>,
219}
220
221impl<'p, 'a, I: Iterator<Item = Event<'a>>> CodeBlocks<'p, 'a, I> {
222    fn new(
223        iter: I,
224        error_codes: ErrorCodes,
225        edition: Edition,
226        playground: &'p Option<Playground>,
227    ) -> Self {
228        CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground }
229    }
230}
231
232impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
233    type Item = Event<'a>;
234
235    fn next(&mut self) -> Option<Self::Item> {
236        let event = self.inner.next();
237        let Some(Event::Start(Tag::CodeBlock(kind))) = event else {
238            return event;
239        };
240
241        let mut original_text = String::new();
242        for event in &mut self.inner {
243            match event {
244                Event::End(TagEnd::CodeBlock) => break,
245                Event::Text(ref s) => {
246                    original_text.push_str(s);
247                }
248                _ => {}
249            }
250        }
251
252        let LangString { added_classes, compile_fail, should_panic, ignore, edition, .. } =
253            match kind {
254                CodeBlockKind::Fenced(ref lang) => {
255                    let parse_result =
256                        LangString::parse_without_check(lang, self.check_error_codes);
257                    if !parse_result.rust {
258                        let added_classes = parse_result.added_classes;
259                        let lang_string = if let Some(lang) = parse_result.unknown.first() {
260                            format!("language-{lang}")
261                        } else {
262                            String::new()
263                        };
264                        let whitespace = if added_classes.is_empty() { "" } else { " " };
265                        return Some(Event::Html(
266                            format!(
267                                "<div class=\"example-wrap\">\
268                                 <pre class=\"{lang_string}{whitespace}{added_classes}\">\
269                                     <code>{text}</code>\
270                                 </pre>\
271                             </div>",
272                                added_classes = added_classes.join(" "),
273                                text = Escape(original_text.trim_suffix('\n')),
274                            )
275                            .into(),
276                        ));
277                    }
278                    parse_result
279                }
280                CodeBlockKind::Indented => Default::default(),
281            };
282
283        let lines = original_text.lines().filter_map(|l| map_line(l).for_html());
284        let text = lines.intersperse("\n".into()).collect::<String>();
285
286        let explicit_edition = edition.is_some();
287        let edition = edition.unwrap_or(self.edition);
288
289        let playground_button = self.playground.as_ref().and_then(|playground| {
290            let krate = &playground.crate_name;
291            let url = &playground.url;
292            if url.is_empty() {
293                return None;
294            }
295            let test = original_text
296                .lines()
297                .map(|l| map_line(l).for_code())
298                .intersperse("\n".into())
299                .collect::<String>();
300            let krate = krate.as_ref().map(|s| s.as_str());
301
302            // FIXME: separate out the code to make a code block into runnable code
303            //        from the complicated doctest logic
304            let opts = GlobalTestOptions {
305                crate_name: krate.map(String::from).unwrap_or_default(),
306                no_crate_inject: false,
307                insert_indent_space: true,
308                args_file: PathBuf::new(),
309            };
310            let mut builder = doctest::BuildDocTestBuilder::new(&test).edition(edition);
311            if let Some(krate) = krate {
312                builder = builder.crate_name(krate);
313            }
314            let doctest = builder.build(None);
315            let (wrapped, _) = doctest.generate_unique_doctest(&test, false, &opts, krate);
316            let test = wrapped.to_string();
317            let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
318
319            let test_escaped = small_url_encode(test);
320            Some(format!(
321                "<a class=\"test-arrow\" \
322                    target=\"_blank\" \
323                    title=\"Run code\" \
324                    href=\"{url}?code={test_escaped}{channel}&amp;edition={edition}\"></a>",
325            ))
326        });
327
328        let tooltip = {
329            use highlight::Tooltip::*;
330
331            if ignore == Ignore::All {
332                Some(IgnoreAll)
333            } else if let Ignore::Some(platforms) = ignore {
334                Some(IgnoreSome(platforms))
335            } else if compile_fail {
336                Some(CompileFail)
337            } else if should_panic {
338                Some(ShouldPanic)
339            } else if explicit_edition {
340                Some(Edition(edition))
341            } else {
342                None
343            }
344        };
345
346        // insert newline to clearly separate it from the
347        // previous block so we can shorten the html output
348        let s = format!(
349            "\n{}",
350            highlight::render_example_with_highlighting(
351                &text,
352                tooltip.as_ref(),
353                playground_button.as_deref(),
354                &added_classes,
355            )
356        );
357        Some(Event::Html(s.into()))
358    }
359}
360
361/// Make headings links with anchor IDs and build up TOC.
362struct LinkReplacerInner<'a> {
363    links: &'a [RenderedLink],
364    shortcut_link: Option<&'a RenderedLink>,
365}
366
367struct LinkReplacer<'a, I: Iterator<Item = Event<'a>>> {
368    iter: I,
369    inner: LinkReplacerInner<'a>,
370}
371
372impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, I> {
373    fn new(iter: I, links: &'a [RenderedLink]) -> Self {
374        LinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
375    }
376}
377
378// FIXME: Once we have specialized trait impl (for `Iterator` impl on `LinkReplacer`),
379// we can remove this type and move back `LinkReplacerInner` fields into `LinkReplacer`.
380struct SpannedLinkReplacer<'a, I: Iterator<Item = SpannedEvent<'a>>> {
381    iter: I,
382    inner: LinkReplacerInner<'a>,
383}
384
385impl<'a, I: Iterator<Item = SpannedEvent<'a>>> SpannedLinkReplacer<'a, I> {
386    fn new(iter: I, links: &'a [RenderedLink]) -> Self {
387        SpannedLinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
388    }
389}
390
391impl<'a> LinkReplacerInner<'a> {
392    fn handle_event(&mut self, event: &mut Event<'a>) {
393        // Replace intra-doc links and remove disambiguators from shortcut links (`[fn@f]`).
394        match event {
395            // This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]`
396            // Remove any disambiguator.
397            Event::Start(Tag::Link {
398                // [fn@f] or [fn@f][]
399                link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
400                dest_url,
401                title,
402                ..
403            }) => {
404                debug!("saw start of shortcut link to {dest_url} with title {title}");
405                // If this is a shortcut link, it was resolved by the broken_link_callback.
406                // So the URL will already be updated properly.
407                let link = self.links.iter().find(|&link| *link.href == **dest_url);
408                // Since this is an external iterator, we can't replace the inner text just yet.
409                // Store that we saw a link so we know to replace it later.
410                if let Some(link) = link {
411                    trace!("it matched");
412                    assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested");
413                    self.shortcut_link = Some(link);
414                    if title.is_empty() && !link.tooltip.is_empty() {
415                        *title = CowStr::Borrowed(link.tooltip.as_ref());
416                    }
417                }
418            }
419            // Now that we're done with the shortcut link, don't replace any more text.
420            Event::End(TagEnd::Link) if self.shortcut_link.is_some() => {
421                debug!("saw end of shortcut link");
422                self.shortcut_link = None;
423            }
424            // Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link.
425            // [`fn@f`]
426            Event::Code(text) => {
427                trace!("saw code {text}");
428                if let Some(link) = self.shortcut_link {
429                    // NOTE: this only replaces if the code block is the *entire* text.
430                    // If only part of the link has code highlighting, the disambiguator will not be removed.
431                    // e.g. [fn@`f`]
432                    // This is a limitation from `collect_intra_doc_links`: it passes a full link,
433                    // and does not distinguish at all between code blocks.
434                    // So we could never be sure we weren't replacing too much:
435                    // [fn@my_`f`unc] is treated the same as [my_func()] in that pass.
436                    //
437                    // NOTE: .get(1..len() - 1) is to strip the backticks
438                    if let Some(link) = self.links.iter().find(|l| {
439                        l.href == link.href
440                            && Some(&**text) == l.original_text.get(1..l.original_text.len() - 1)
441                    }) {
442                        debug!("replacing {text} with {new_text}", new_text = link.new_text);
443                        *text = CowStr::Borrowed(&link.new_text);
444                    }
445                }
446            }
447            // Replace plain text in links, but only in the middle of a shortcut link.
448            // [fn@f]
449            Event::Text(text) => {
450                trace!("saw text {text}");
451                if let Some(link) = self.shortcut_link {
452                    // NOTE: same limitations as `Event::Code`
453                    if let Some(link) = self
454                        .links
455                        .iter()
456                        .find(|l| l.href == link.href && **text == *l.original_text)
457                    {
458                        debug!("replacing {text} with {new_text}", new_text = link.new_text);
459                        *text = CowStr::Borrowed(&link.new_text);
460                    }
461                }
462            }
463            // If this is a link, but not a shortcut link,
464            // replace the URL, since the broken_link_callback was not called.
465            Event::Start(Tag::Link { dest_url, title, .. }) => {
466                if let Some(link) =
467                    self.links.iter().find(|&link| *link.original_text == **dest_url)
468                {
469                    *dest_url = CowStr::Borrowed(link.href.as_ref());
470                    if title.is_empty() && !link.tooltip.is_empty() {
471                        *title = CowStr::Borrowed(link.tooltip.as_ref());
472                    }
473                }
474            }
475            // Anything else couldn't have been a valid Rust path, so no need to replace the text.
476            _ => {}
477        }
478    }
479}
480
481impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
482    type Item = Event<'a>;
483
484    fn next(&mut self) -> Option<Self::Item> {
485        let mut event = self.iter.next();
486        if let Some(ref mut event) = event {
487            self.inner.handle_event(event);
488        }
489        // Yield the modified event
490        event
491    }
492}
493
494impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for SpannedLinkReplacer<'a, I> {
495    type Item = SpannedEvent<'a>;
496
497    fn next(&mut self) -> Option<Self::Item> {
498        let (mut event, range) = self.iter.next()?;
499        self.inner.handle_event(&mut event);
500        // Yield the modified event
501        Some((event, range))
502    }
503}
504
505/// Wrap HTML tables into `<div>` to prevent having the doc blocks width being too big.
506struct TableWrapper<'a, I: Iterator<Item = Event<'a>>> {
507    inner: I,
508    stored_events: VecDeque<Event<'a>>,
509}
510
511impl<'a, I: Iterator<Item = Event<'a>>> TableWrapper<'a, I> {
512    fn new(iter: I) -> Self {
513        Self { inner: iter, stored_events: VecDeque::new() }
514    }
515}
516
517impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> {
518    type Item = Event<'a>;
519
520    fn next(&mut self) -> Option<Self::Item> {
521        if let Some(first) = self.stored_events.pop_front() {
522            return Some(first);
523        }
524
525        let event = self.inner.next()?;
526
527        Some(match event {
528            Event::Start(Tag::Table(t)) => {
529                self.stored_events.push_back(Event::Start(Tag::Table(t)));
530                Event::Html(CowStr::Borrowed("<div>"))
531            }
532            Event::End(TagEnd::Table) => {
533                self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
534                Event::End(TagEnd::Table)
535            }
536            e => e,
537        })
538    }
539}
540
541type SpannedEvent<'a> = (Event<'a>, Range<usize>);
542
543/// Make headings links with anchor IDs and build up TOC.
544struct HeadingLinks<'a, 'b, 'ids, I> {
545    inner: I,
546    toc: Option<&'b mut TocBuilder>,
547    buf: VecDeque<SpannedEvent<'a>>,
548    id_map: &'ids mut IdMap,
549    heading_offset: HeadingOffset,
550}
551
552impl<'b, 'ids, I> HeadingLinks<'_, 'b, 'ids, I> {
553    fn new(
554        iter: I,
555        toc: Option<&'b mut TocBuilder>,
556        ids: &'ids mut IdMap,
557        heading_offset: HeadingOffset,
558    ) -> Self {
559        HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids, heading_offset }
560    }
561}
562
563impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for HeadingLinks<'a, '_, '_, I> {
564    type Item = SpannedEvent<'a>;
565
566    fn next(&mut self) -> Option<Self::Item> {
567        if let Some(e) = self.buf.pop_front() {
568            return Some(e);
569        }
570
571        let event = self.inner.next();
572        if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
573            let mut id = String::new();
574            for event in &mut self.inner {
575                match &event.0 {
576                    Event::End(TagEnd::Heading(_)) => break,
577                    Event::Text(text) | Event::Code(text) => {
578                        id.extend(text.chars().filter_map(slugify));
579                        self.buf.push_back(event);
580                    }
581                    _ => self.buf.push_back(event),
582                }
583            }
584            let id = self.id_map.derive(id);
585            let percent_encoded_id = small_url_encode(id.clone());
586
587            if let Some(ref mut builder) = self.toc {
588                let mut text_header = String::new();
589                plain_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut text_header);
590                let mut html_header = String::new();
591                html_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut html_header);
592                let sec = builder.push(level as u32, text_header, html_header, id.clone());
593                self.buf.push_front((Event::Html(format!("{sec} ").into()), 0..0));
594            }
595
596            let level =
597                std::cmp::min(level as u32 + (self.heading_offset as u32), MAX_HEADER_LEVEL);
598            self.buf.push_back((Event::Html(format!("</h{level}>").into()), 0..0));
599
600            let start_tags = format!(
601                "<h{level} id=\"{id}\"><a class=\"doc-anchor\" href=\"#{percent_encoded_id}\">§</a>"
602            );
603            return Some((Event::Html(start_tags.into()), 0..0));
604        }
605        event
606    }
607}
608
609/// Extracts just the first paragraph.
610struct SummaryLine<'a, I: Iterator<Item = Event<'a>>> {
611    inner: I,
612    started: bool,
613    depth: u32,
614    skipped_tags: u32,
615}
616
617impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> {
618    fn new(iter: I) -> Self {
619        SummaryLine { inner: iter, started: false, depth: 0, skipped_tags: 0 }
620    }
621}
622
623fn check_if_allowed_tag(t: &TagEnd) -> bool {
624    matches!(
625        t,
626        TagEnd::Paragraph
627            | TagEnd::Emphasis
628            | TagEnd::Strong
629            | TagEnd::Strikethrough
630            | TagEnd::Link
631            | TagEnd::BlockQuote
632    )
633}
634
635fn is_forbidden_tag(t: &TagEnd) -> bool {
636    matches!(
637        t,
638        TagEnd::CodeBlock
639            | TagEnd::Table
640            | TagEnd::TableHead
641            | TagEnd::TableRow
642            | TagEnd::TableCell
643            | TagEnd::FootnoteDefinition
644    )
645}
646
647impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
648    type Item = Event<'a>;
649
650    fn next(&mut self) -> Option<Self::Item> {
651        if self.started && self.depth == 0 {
652            return None;
653        }
654        if !self.started {
655            self.started = true;
656        }
657        if let Some(event) = self.inner.next() {
658            let mut is_start = true;
659            let is_allowed_tag = match event {
660                Event::Start(ref c) => {
661                    if is_forbidden_tag(&c.to_end()) {
662                        self.skipped_tags += 1;
663                        return None;
664                    }
665                    self.depth += 1;
666                    check_if_allowed_tag(&c.to_end())
667                }
668                Event::End(ref c) => {
669                    if is_forbidden_tag(c) {
670                        self.skipped_tags += 1;
671                        return None;
672                    }
673                    self.depth -= 1;
674                    is_start = false;
675                    check_if_allowed_tag(c)
676                }
677                Event::FootnoteReference(_) => {
678                    self.skipped_tags += 1;
679                    false
680                }
681                _ => true,
682            };
683            if !is_allowed_tag {
684                self.skipped_tags += 1;
685            }
686            return if !is_allowed_tag {
687                if is_start {
688                    Some(Event::Start(Tag::Paragraph))
689                } else {
690                    Some(Event::End(TagEnd::Paragraph))
691                }
692            } else {
693                Some(event)
694            };
695        }
696        None
697    }
698}
699
700/// A newtype that represents a relative line number in Markdown.
701///
702/// In other words, this represents an offset from the first line of Markdown
703/// in a doc comment or other source. If the first Markdown line appears on line 32,
704/// and the `MdRelLine` is 3, then the absolute line for this one is 35. I.e., it's
705/// a zero-based offset.
706pub(crate) struct MdRelLine {
707    offset: usize,
708}
709
710impl MdRelLine {
711    /// See struct docs.
712    pub(crate) const fn new(offset: usize) -> Self {
713        Self { offset }
714    }
715
716    /// See struct docs.
717    pub(crate) const fn offset(self) -> usize {
718        self.offset
719    }
720}
721
722#[derive(Clone, Debug)]
723pub(crate) struct CodeLineMapping {
724    pub(crate) generated: Range<usize>,
725    pub(crate) original: Span,
726}
727
728pub(crate) fn find_testable_code<T: doctest::DocTestVisitor>(
729    doc: &str,
730    tests: &mut T,
731    error_codes: ErrorCodes,
732    extra_info: Option<&ExtraInfo<'_, '_>>,
733) {
734    find_codes(doc, tests, error_codes, extra_info, false)
735}
736
737pub(crate) fn find_codes<T: doctest::DocTestVisitor>(
738    doc: &str,
739    tests: &mut T,
740    error_codes: ErrorCodes,
741    extra_info: Option<&ExtraInfo<'_, '_>>,
742    include_non_rust: bool,
743) {
744    let mut parser = Parser::new_ext(doc, main_body_opts()).into_offset_iter();
745    let mut prev_offset = 0;
746    let mut nb_lines = 0;
747    let mut register_header = None;
748    while let Some((event, offset)) = parser.next() {
749        match event {
750            Event::Start(Tag::CodeBlock(kind)) => {
751                let block_info = match kind {
752                    CodeBlockKind::Fenced(ref lang) => {
753                        if lang.is_empty() {
754                            Default::default()
755                        } else {
756                            LangString::parse(lang, error_codes, extra_info)
757                        }
758                    }
759                    CodeBlockKind::Indented => Default::default(),
760                };
761                if !include_non_rust && !block_info.rust {
762                    continue;
763                }
764
765                let mut test_s = String::new();
766                let mut text_events = Vec::new();
767
768                while let Some((Event::Text(s), offset)) = parser.next() {
769                    let start = test_s.len();
770                    test_s.push_str(&s);
771                    text_events.push((start..test_s.len(), offset));
772                }
773                let (text, code_mappings) = map_code_block(doc, &test_s, &text_events, extra_info);
774
775                nb_lines += doc[prev_offset..offset.start].lines().count();
776                // If there are characters between the preceding line ending and
777                // this code block, `str::lines` will return an additional line,
778                // which we subtract here.
779                if nb_lines != 0 && !&doc[prev_offset..offset.start].ends_with('\n') {
780                    nb_lines -= 1;
781                }
782                let line = MdRelLine::new(nb_lines);
783                tests.visit_test(text, block_info, line, code_mappings);
784                prev_offset = offset.start;
785            }
786            Event::Start(Tag::Heading { level, .. }) => {
787                register_header = Some(level as u32);
788            }
789            Event::Text(ref s) if register_header.is_some() => {
790                let level = register_header.unwrap();
791                tests.visit_header(s, level);
792                register_header = None;
793            }
794            _ => {}
795        }
796    }
797}
798
799fn map_code_block(
800    doc: &str,
801    code: &str,
802    text_events: &[(Range<usize>, Range<usize>)],
803    extra_info: Option<&ExtraInfo<'_, '_>>,
804) -> (String, Vec<CodeLineMapping>) {
805    let mut text = String::new();
806    let mut code_mappings = Vec::new();
807    let mut code_line_start = 0;
808
809    for (line_index, line) in code.lines().enumerate() {
810        if line_index != 0 {
811            text.push('\n');
812        }
813
814        let generated_start = text.len();
815        let mapped_line = map_line(line).for_code();
816        text.push_str(&mapped_line);
817        let generated = generated_start..text.len();
818
819        if mapped_line.as_ref() == line
820            && let Some(extra_info) = extra_info
821            && let Some(fragments) = extra_info.fragments
822        {
823            let code_line = code_line_start..code_line_start + line.len();
824            if let Some(md_range) = markdown_range_for_code_range(text_events, code_line)
825                && let Some((original, _)) =
826                    source_span_for_markdown_range(extra_info.tcx, doc, &md_range, fragments)
827            {
828                code_mappings.push(CodeLineMapping { generated, original });
829            }
830        }
831
832        code_line_start += line.len() + 1;
833    }
834
835    (text, code_mappings)
836}
837
838fn markdown_range_for_code_range(
839    text_events: &[(Range<usize>, Range<usize>)],
840    code_range: Range<usize>,
841) -> Option<Range<usize>> {
842    text_events.iter().find_map(|(event_code_range, event_md_range)| {
843        if event_code_range.start <= code_range.start && code_range.end <= event_code_range.end {
844            let start = event_md_range.start + code_range.start - event_code_range.start;
845            let end = event_md_range.start + code_range.end - event_code_range.start;
846            Some(start..end)
847        } else {
848            None
849        }
850    })
851}
852
853pub(crate) struct ExtraInfo<'doc, 'tcx> {
854    def_id: LocalDefId,
855    sp: Span,
856    tcx: TyCtxt<'tcx>,
857    fragments: Option<&'doc [DocFragment]>,
858}
859
860impl<'doc, 'tcx> ExtraInfo<'doc, 'tcx> {
861    pub(crate) fn new(
862        tcx: TyCtxt<'tcx>,
863        def_id: LocalDefId,
864        sp: Span,
865        fragments: Option<&'doc [DocFragment]>,
866    ) -> ExtraInfo<'doc, 'tcx> {
867        ExtraInfo { def_id, sp, tcx, fragments }
868    }
869
870    fn error_invalid_codeblock_attr(&self, msg: impl Into<DiagMessage>) {
871        self.error_invalid_codeblock_attr_with_help(msg, |_| {});
872    }
873
874    fn error_invalid_codeblock_attr_with_help(
875        &self,
876        msg: impl Into<DiagMessage>,
877        f: impl for<'a, 'b> FnOnce(&'b mut Diag<'a, ()>),
878    ) {
879        self.tcx.emit_node_span_lint(
880            crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
881            self.tcx.local_def_id_to_hir_id(self.def_id),
882            self.sp,
883            rustc_errors::DiagDecorator(|lint| {
884                lint.primary_message(msg);
885                f(lint);
886            }),
887        );
888    }
889}
890
891#[derive(Eq, PartialEq, Clone, Debug)]
892pub(crate) struct LangString {
893    pub(crate) original: String,
894    pub(crate) should_panic: bool,
895    pub(crate) no_run: bool,
896    pub(crate) ignore: Ignore,
897    pub(crate) rust: bool,
898    pub(crate) test_harness: bool,
899    pub(crate) compile_fail: bool,
900    pub(crate) standalone_crate: bool,
901    pub(crate) error_codes: Vec<String>,
902    pub(crate) edition: Option<Edition>,
903    pub(crate) added_classes: Vec<String>,
904    pub(crate) unknown: Vec<String>,
905}
906
907#[derive(Eq, PartialEq, Clone, Debug)]
908pub(crate) enum Ignore {
909    All,
910    None,
911    Some(Vec<String>),
912}
913
914/// This is the parser for fenced codeblocks attributes.
915///
916/// It implements the following grammar as expressed in ABNF:
917///
918/// ```ABNF
919/// lang-string = *(token-list / delimited-attribute-list / comment)
920/// bareword = LEADINGCHAR *(CHAR)
921/// bareword-without-leading-char = CHAR *(CHAR)
922/// quoted-string = QUOTE *(NONQUOTE) QUOTE
923/// token = bareword / quoted-string
924/// token-without-leading-char = bareword-without-leading-char / quoted-string
925/// sep = COMMA/WS *(COMMA/WS)
926/// attribute = (DOT token)/(token EQUAL token-without-leading-char)
927/// attribute-list = [sep] attribute *(sep attribute) [sep]
928/// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
929/// token-list = [sep] token *(sep token) [sep]
930/// comment = OPEN_PAREN *<all characters except closing parentheses> CLOSE_PAREN
931///
932/// OPEN_PAREN = "("
933/// CLOSE_PARENT = ")"
934/// OPEN-CURLY-BRACKET = "{"
935/// CLOSE-CURLY-BRACKET = "}"
936/// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
937/// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
938/// ; Comma is used to separate language tokens, so it can't be used in one.
939/// ; Quote is used to allow otherwise-disallowed characters in language tokens.
940/// ; Equals is used to make key=value pairs in attribute blocks.
941/// ; Backslash and grave are special Markdown characters.
942/// ; Braces are used to start an attribute block.
943/// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
944///        ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
945/// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
946/// COMMA = ","
947/// DOT = "."
948/// EQUAL = "="
949///
950/// ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
951/// DIGIT = %x30-39
952/// WS = %x09 / " "
953/// ```
954pub(crate) struct TagIterator<'a, 'tcx> {
955    inner: Peekable<CharIndices<'a>>,
956    data: &'a str,
957    is_in_attribute_block: bool,
958    extra: Option<&'a ExtraInfo<'a, 'tcx>>,
959    is_error: bool,
960}
961
962#[derive(Clone, Debug, Eq, PartialEq)]
963pub(crate) enum LangStringToken<'a> {
964    LangToken(&'a str),
965    ClassAttribute(&'a str),
966    KeyValueAttribute(&'a str, &'a str),
967}
968
969fn is_leading_char(c: char) -> bool {
970    c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
971}
972fn is_bareword_char(c: char) -> bool {
973    is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
974}
975fn is_separator(c: char) -> bool {
976    c == ' ' || c == ',' || c == '\t'
977}
978
979struct Indices {
980    start: usize,
981    end: usize,
982}
983
984impl<'a, 'tcx> TagIterator<'a, 'tcx> {
985    pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'a, 'tcx>>) -> Self {
986        Self {
987            inner: data.char_indices().peekable(),
988            data,
989            is_in_attribute_block: false,
990            extra,
991            is_error: false,
992        }
993    }
994
995    fn emit_error(&mut self, err: impl Into<DiagMessage>) {
996        if let Some(extra) = self.extra {
997            extra.error_invalid_codeblock_attr(err);
998        }
999        self.is_error = true;
1000    }
1001
1002    fn skip_separators(&mut self) -> Option<usize> {
1003        while let Some((pos, c)) = self.inner.peek() {
1004            if !is_separator(*c) {
1005                return Some(*pos);
1006            }
1007            self.inner.next();
1008        }
1009        None
1010    }
1011
1012    fn parse_string(&mut self, start: usize) -> Option<Indices> {
1013        for (pos, c) in self.inner.by_ref() {
1014            if c == '"' {
1015                return Some(Indices { start: start + 1, end: pos });
1016            }
1017        }
1018        self.emit_error("unclosed quote string `\"`");
1019        None
1020    }
1021
1022    fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> {
1023        while let Some((pos, c)) = self.inner.peek().copied() {
1024            if is_bareword_char(c) {
1025                self.inner.next();
1026            } else {
1027                let class = &self.data[start + 1..pos];
1028                if class.is_empty() {
1029                    self.emit_error(format!("unexpected `{c}` character after `.`"));
1030                    return None;
1031                } else if self.check_after_token() {
1032                    return Some(LangStringToken::ClassAttribute(class));
1033                } else {
1034                    return None;
1035                }
1036            }
1037        }
1038        let class = &self.data[start + 1..];
1039        if class.is_empty() {
1040            self.emit_error("missing character after `.`");
1041            None
1042        } else if self.check_after_token() {
1043            Some(LangStringToken::ClassAttribute(class))
1044        } else {
1045            None
1046        }
1047    }
1048
1049    fn parse_token(&mut self, start: usize) -> Option<Indices> {
1050        while let Some((pos, c)) = self.inner.peek() {
1051            if !is_bareword_char(*c) {
1052                return Some(Indices { start, end: *pos });
1053            }
1054            self.inner.next();
1055        }
1056        self.emit_error("unexpected end");
1057        None
1058    }
1059
1060    fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> {
1061        let key_indices =
1062            if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? };
1063        if key_indices.start == key_indices.end {
1064            self.emit_error("unexpected empty string as key");
1065            return None;
1066        }
1067
1068        if let Some((_, c)) = self.inner.next() {
1069            if c != '=' {
1070                self.emit_error(format!("expected `=`, found `{c}`"));
1071                return None;
1072            }
1073        } else {
1074            self.emit_error("unexpected end");
1075            return None;
1076        }
1077        let value_indices = match self.inner.next() {
1078            Some((pos, '"')) => self.parse_string(pos)?,
1079            Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?,
1080            Some((_, c)) => {
1081                self.emit_error(format!("unexpected `{c}` character after `=`"));
1082                return None;
1083            }
1084            None => {
1085                self.emit_error("expected value after `=`");
1086                return None;
1087            }
1088        };
1089        if value_indices.start == value_indices.end {
1090            self.emit_error("unexpected empty string as value");
1091            None
1092        } else if self.check_after_token() {
1093            Some(LangStringToken::KeyValueAttribute(
1094                &self.data[key_indices.start..key_indices.end],
1095                &self.data[value_indices.start..value_indices.end],
1096            ))
1097        } else {
1098            None
1099        }
1100    }
1101
1102    /// Returns `false` if an error was emitted.
1103    fn check_after_token(&mut self) -> bool {
1104        if let Some((_, c)) = self.inner.peek().copied() {
1105            if c == '}' || is_separator(c) || c == '(' {
1106                true
1107            } else {
1108                self.emit_error(format!("unexpected `{c}` character"));
1109                false
1110            }
1111        } else {
1112            // The error will be caught on the next iteration.
1113            true
1114        }
1115    }
1116
1117    fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> {
1118        if let Some((pos, c)) = self.inner.next() {
1119            if c == '}' {
1120                self.is_in_attribute_block = false;
1121                return self.next();
1122            } else if c == '.' {
1123                return self.parse_class(pos);
1124            } else if c == '"' || is_leading_char(c) {
1125                return self.parse_key_value(c, pos);
1126            } else {
1127                self.emit_error(format!("unexpected character `{c}`"));
1128                return None;
1129            }
1130        }
1131        self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1132        None
1133    }
1134
1135    /// Returns `false` if an error was emitted.
1136    fn skip_paren_block(&mut self) -> bool {
1137        for (_, c) in self.inner.by_ref() {
1138            if c == ')' {
1139                return true;
1140            }
1141        }
1142        self.emit_error("unclosed comment: missing `)` at the end");
1143        false
1144    }
1145
1146    fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> {
1147        while let Some((pos, c)) = self.inner.next() {
1148            if c == '"' {
1149                if pos != start {
1150                    self.emit_error("expected ` `, `{` or `,` found `\"`");
1151                    return None;
1152                }
1153                let indices = self.parse_string(pos)?;
1154                if let Some((_, c)) = self.inner.peek().copied()
1155                    && c != '{'
1156                    && !is_separator(c)
1157                    && c != '('
1158                {
1159                    self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
1160                    return None;
1161                }
1162                return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
1163            } else if c == '{' {
1164                self.is_in_attribute_block = true;
1165                return self.next();
1166            } else if is_separator(c) {
1167                if pos != start {
1168                    return Some(LangStringToken::LangToken(&self.data[start..pos]));
1169                }
1170                return self.next();
1171            } else if c == '(' {
1172                if !self.skip_paren_block() {
1173                    return None;
1174                }
1175                if pos != start {
1176                    return Some(LangStringToken::LangToken(&self.data[start..pos]));
1177                }
1178                return self.next();
1179            } else if (pos == start && is_leading_char(c)) || (pos != start && is_bareword_char(c))
1180            {
1181                continue;
1182            } else {
1183                self.emit_error(format!("unexpected character `{c}`"));
1184                return None;
1185            }
1186        }
1187        let token = &self.data[start..];
1188        if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) }
1189    }
1190}
1191
1192impl<'a> Iterator for TagIterator<'a, '_> {
1193    type Item = LangStringToken<'a>;
1194
1195    fn next(&mut self) -> Option<Self::Item> {
1196        if self.is_error {
1197            return None;
1198        }
1199        let Some(start) = self.skip_separators() else {
1200            if self.is_in_attribute_block {
1201                self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1202            }
1203            return None;
1204        };
1205        if self.is_in_attribute_block {
1206            self.parse_in_attribute_block()
1207        } else {
1208            self.parse_outside_attribute_block(start)
1209        }
1210    }
1211}
1212
1213impl Default for LangString {
1214    fn default() -> Self {
1215        Self {
1216            original: String::new(),
1217            should_panic: false,
1218            no_run: false,
1219            ignore: Ignore::None,
1220            rust: true,
1221            test_harness: false,
1222            compile_fail: false,
1223            standalone_crate: false,
1224            error_codes: Vec::new(),
1225            edition: None,
1226            added_classes: Vec::new(),
1227            unknown: Vec::new(),
1228        }
1229    }
1230}
1231
1232impl LangString {
1233    fn parse_without_check(string: &str, allow_error_code_check: ErrorCodes) -> Self {
1234        Self::parse(string, allow_error_code_check, None)
1235    }
1236
1237    fn parse(
1238        string: &str,
1239        allow_error_code_check: ErrorCodes,
1240        extra: Option<&ExtraInfo<'_, '_>>,
1241    ) -> Self {
1242        let allow_error_code_check = allow_error_code_check.as_bool();
1243        let mut seen_rust_tags = false;
1244        let mut seen_other_tags = false;
1245        let mut seen_custom_tag = false;
1246        let mut data = LangString::default();
1247        let mut ignores = vec![];
1248
1249        data.original = string.to_owned();
1250
1251        let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
1252            for token in tokens {
1253                match token {
1254                    LangStringToken::LangToken("should_panic") => {
1255                        data.should_panic = true;
1256                        seen_rust_tags = !seen_other_tags;
1257                    }
1258                    LangStringToken::LangToken("no_run") => {
1259                        data.no_run = true;
1260                        seen_rust_tags = !seen_other_tags;
1261                    }
1262                    LangStringToken::LangToken("ignore") => {
1263                        data.ignore = Ignore::All;
1264                        seen_rust_tags = !seen_other_tags;
1265                    }
1266                    LangStringToken::LangToken(x)
1267                        if let Some(ignore) = x.strip_prefix("ignore-") =>
1268                    {
1269                        ignores.push(ignore.to_owned());
1270                        seen_rust_tags = !seen_other_tags;
1271                    }
1272                    LangStringToken::LangToken("rust") => {
1273                        data.rust = true;
1274                        seen_rust_tags = true;
1275                    }
1276                    LangStringToken::LangToken("custom") => {
1277                        seen_custom_tag = true;
1278                    }
1279                    LangStringToken::LangToken("test_harness") => {
1280                        data.test_harness = true;
1281                        seen_rust_tags = !seen_other_tags || seen_rust_tags;
1282                    }
1283                    LangStringToken::LangToken("compile_fail") => {
1284                        data.compile_fail = true;
1285                        seen_rust_tags = !seen_other_tags || seen_rust_tags;
1286                        data.no_run = true;
1287                    }
1288                    LangStringToken::LangToken("standalone_crate") => {
1289                        data.standalone_crate = true;
1290                        seen_rust_tags = !seen_other_tags || seen_rust_tags;
1291                    }
1292                    LangStringToken::LangToken(x)
1293                        if let Some(edition) = x.strip_prefix("edition") =>
1294                    {
1295                        data.edition = edition.parse::<Edition>().ok();
1296                    }
1297                    LangStringToken::LangToken(x)
1298                        if let Some(edition) = x.strip_prefix("rust")
1299                            && edition.parse::<Edition>().is_ok()
1300                            && let Some(extra) = extra =>
1301                    {
1302                        extra.error_invalid_codeblock_attr_with_help(
1303                            format!("unknown attribute `{x}`"),
1304                            |lint| {
1305                                lint.help(format!(
1306                                    "there is an attribute with a similar name: `edition{edition}`"
1307                                ));
1308                            },
1309                        );
1310                    }
1311                    LangStringToken::LangToken(x)
1312                        if allow_error_code_check
1313                            && let Some(error_code) = x.strip_prefix('E')
1314                            && error_code.len() == 4 =>
1315                    {
1316                        if error_code.parse::<u32>().is_ok() {
1317                            data.error_codes.push(x.to_owned());
1318                            seen_rust_tags = !seen_other_tags || seen_rust_tags;
1319                        } else {
1320                            seen_other_tags = true;
1321                        }
1322                    }
1323                    LangStringToken::LangToken(x) if let Some(extra) = extra => {
1324                        if let Some(help) = match x.to_lowercase().as_str() {
1325                            "compile-fail" | "compile_fail" | "compilefail" => Some(
1326                                "use `compile_fail` to invert the results of this test, so that it \
1327                                passes if it cannot be compiled and fails if it can",
1328                            ),
1329                            "should-panic" | "should_panic" | "shouldpanic" => Some(
1330                                "use `should_panic` to invert the results of this test, so that if \
1331                                passes if it panics and fails if it does not",
1332                            ),
1333                            "no-run" | "no_run" | "norun" => Some(
1334                                "use `no_run` to compile, but not run, the code sample during \
1335                                testing",
1336                            ),
1337                            "test-harness" | "test_harness" | "testharness" => Some(
1338                                "use `test_harness` to run functions marked `#[test]` instead of a \
1339                                potentially-implicit `main` function",
1340                            ),
1341                            "standalone" | "standalone_crate" | "standalone-crate"
1342                                if extra.sp.at_least_rust_2024() =>
1343                            {
1344                                Some(
1345                                    "use `standalone_crate` to compile this code block \
1346                                        separately",
1347                                )
1348                            }
1349                            _ => None,
1350                        } {
1351                            extra.error_invalid_codeblock_attr_with_help(
1352                                format!("unknown attribute `{x}`"),
1353                                |lint| {
1354                                    lint.help(help).help(
1355                                        "this code block may be skipped during testing, \
1356                                            because unknown attributes are treated as markers for \
1357                                            code samples written in other programming languages, \
1358                                            unless it is also explicitly marked as `rust`",
1359                                    );
1360                                },
1361                            );
1362                        }
1363                        seen_other_tags = true;
1364                        data.unknown.push(x.to_owned());
1365                    }
1366                    LangStringToken::LangToken(x) => {
1367                        seen_other_tags = true;
1368                        data.unknown.push(x.to_owned());
1369                    }
1370                    LangStringToken::KeyValueAttribute("class", value) => {
1371                        data.added_classes.push(value.to_owned());
1372                    }
1373                    LangStringToken::KeyValueAttribute(key, ..) if let Some(extra) = extra => {
1374                        extra
1375                            .error_invalid_codeblock_attr(format!("unsupported attribute `{key}`"));
1376                    }
1377                    LangStringToken::ClassAttribute(class) => {
1378                        data.added_classes.push(class.to_owned());
1379                    }
1380                    _ => {}
1381                }
1382            }
1383        };
1384
1385        let mut tag_iter = TagIterator::new(string, extra);
1386        call(&mut tag_iter);
1387
1388        // ignore-foo overrides ignore
1389        if !ignores.is_empty() {
1390            data.ignore = Ignore::Some(ignores);
1391        }
1392
1393        data.rust &= !seen_custom_tag && (!seen_other_tags || seen_rust_tags) && !tag_iter.is_error;
1394
1395        data
1396    }
1397}
1398
1399impl<'a> Markdown<'a> {
1400    pub fn write_into(self, f: impl fmt::Write) -> fmt::Result {
1401        // This is actually common enough to special-case
1402        if self.content.is_empty() {
1403            return Ok(());
1404        }
1405
1406        html::write_html_fmt(f, self.into_iter())
1407    }
1408
1409    fn into_iter(self) -> CodeBlocks<'a, 'a, impl Iterator<Item = Event<'a>>> {
1410        let Markdown {
1411            content: md,
1412            links,
1413            ids,
1414            error_codes: codes,
1415            edition,
1416            playground,
1417            heading_offset,
1418        } = self;
1419
1420        let replacer = move |broken_link: BrokenLink<'_>| {
1421            links
1422                .iter()
1423                .find(|link| *link.original_text == *broken_link.reference)
1424                .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1425        };
1426
1427        let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1428        let p = p.into_offset_iter();
1429
1430        ids.handle_footnotes(|ids, existing_footnotes| {
1431            let p = HeadingLinks::new(p, None, ids, heading_offset);
1432            let p = SpannedLinkReplacer::new(p, links);
1433            let p = footnotes::Footnotes::new(p, existing_footnotes);
1434            let p = TableWrapper::new(p.map(|(ev, _)| ev));
1435            CodeBlocks::new(p, codes, edition, playground)
1436        })
1437    }
1438
1439    /// Convert markdown to (summary, remaining) HTML.
1440    ///
1441    /// - The summary is the first top-level Markdown element (usually a paragraph, but potentially
1442    ///   any block).
1443    /// - The remaining docs contain everything after the summary.
1444    pub(crate) fn split_summary_and_content(self) -> (Option<String>, Option<String>) {
1445        if self.content.is_empty() {
1446            return (None, None);
1447        }
1448        let mut p = self.into_iter();
1449
1450        let mut event_level = 0;
1451        let mut summary_events = Vec::new();
1452        let mut get_next_tag = false;
1453
1454        let mut end_of_summary = false;
1455        while let Some(event) = p.next() {
1456            match event {
1457                Event::Start(_) => event_level += 1,
1458                Event::End(kind) => {
1459                    event_level -= 1;
1460                    if event_level == 0 {
1461                        // We're back at the "top" so it means we're done with the summary.
1462                        end_of_summary = true;
1463                        // We surround tables with `<div>` HTML tags so this is a special case.
1464                        get_next_tag = kind == TagEnd::Table;
1465                    }
1466                }
1467                _ => {}
1468            }
1469            summary_events.push(event);
1470            if end_of_summary {
1471                if get_next_tag && let Some(event) = p.next() {
1472                    summary_events.push(event);
1473                }
1474                break;
1475            }
1476        }
1477        let mut summary = String::new();
1478        html::push_html(&mut summary, summary_events.into_iter());
1479        if summary.is_empty() {
1480            return (None, None);
1481        }
1482        let mut content = String::new();
1483        html::push_html(&mut content, p);
1484
1485        if content.is_empty() { (Some(summary), None) } else { (Some(summary), Some(content)) }
1486    }
1487}
1488
1489impl MarkdownWithToc<'_> {
1490    pub(crate) fn into_parts(self) -> (Toc, String) {
1491        let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } =
1492            self;
1493
1494        // This is actually common enough to special-case
1495        if md.is_empty() {
1496            return (Toc { entries: Vec::new() }, String::new());
1497        }
1498        let mut replacer = |broken_link: BrokenLink<'_>| {
1499            links
1500                .iter()
1501                .find(|link| *link.original_text == *broken_link.reference)
1502                .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1503        };
1504
1505        let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut replacer));
1506        let p = p.into_offset_iter();
1507
1508        let mut s = String::with_capacity(md.len() * 3 / 2);
1509
1510        let mut toc = TocBuilder::new();
1511
1512        ids.handle_footnotes(|ids, existing_footnotes| {
1513            let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1);
1514            let p = footnotes::Footnotes::new(p, existing_footnotes);
1515            let p = TableWrapper::new(p.map(|(ev, _)| ev));
1516            let p = CodeBlocks::new(p, codes, edition, playground);
1517            html::push_html(&mut s, p);
1518        });
1519
1520        (toc.into_toc(), s)
1521    }
1522
1523    pub(crate) fn write_into(self, mut f: impl fmt::Write) -> fmt::Result {
1524        let (toc, s) = self.into_parts();
1525        write!(f, "<nav id=\"rustdoc\">{toc}</nav>{s}", toc = toc.print())
1526    }
1527}
1528
1529impl<'a> MarkdownItemInfo<'a> {
1530    pub(crate) fn new(content: &'a str, links: &'a [RenderedLink], ids: &'a mut IdMap) -> Self {
1531        Self { content, links, ids }
1532    }
1533
1534    pub(crate) fn write_into(self, mut f: impl fmt::Write) -> fmt::Result {
1535        let MarkdownItemInfo { content: md, links, ids } = self;
1536
1537        // This is actually common enough to special-case
1538        if md.is_empty() {
1539            return Ok(());
1540        }
1541
1542        let replacer = move |broken_link: BrokenLink<'_>| {
1543            links
1544                .iter()
1545                .find(|link| *link.original_text == *broken_link.reference)
1546                .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1547        };
1548
1549        let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1550        let p = p.into_offset_iter();
1551
1552        // Treat inline HTML as plain text.
1553        let p = p.map(|event| match event.0 {
1554            Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
1555            _ => event,
1556        });
1557
1558        ids.handle_footnotes(|ids, existing_footnotes| {
1559            let p = HeadingLinks::new(p, None, ids, HeadingOffset::H1);
1560            let p = SpannedLinkReplacer::new(p, links);
1561            let p = footnotes::Footnotes::new(p, existing_footnotes);
1562            let p = TableWrapper::new(p.map(|(ev, _)| ev));
1563            // in legacy wrap mode, strip <p> elements to avoid them inserting newlines
1564            html::write_html_fmt(&mut f, p)?;
1565
1566            Ok(())
1567        })
1568    }
1569}
1570
1571impl MarkdownSummaryLine<'_> {
1572    pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
1573        let MarkdownSummaryLine(md, links) = self;
1574        // This is actually common enough to special-case
1575        if md.is_empty() {
1576            return (String::new(), false);
1577        }
1578
1579        let mut replacer = |broken_link: BrokenLink<'_>| {
1580            links
1581                .iter()
1582                .find(|link| *link.original_text == *broken_link.reference)
1583                .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1584        };
1585
1586        let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer))
1587            .peekable();
1588        let mut summary = SummaryLine::new(p);
1589
1590        let mut s = String::new();
1591
1592        let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
1593            !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1594        });
1595
1596        html::push_html(&mut s, without_paragraphs);
1597
1598        let has_more_content =
1599            matches!(summary.inner.peek(), Some(Event::Start(_))) || summary.skipped_tags > 0;
1600
1601        (s, has_more_content)
1602    }
1603
1604    pub(crate) fn into_string(self) -> String {
1605        self.into_string_with_has_more_content().0
1606    }
1607}
1608
1609/// Renders a subset of Markdown in the first paragraph of the provided Markdown.
1610///
1611/// - *Italics*, **bold**, and `inline code` styles **are** rendered.
1612/// - Headings and links are stripped (though the text *is* rendered).
1613/// - HTML, code blocks, and everything else are ignored.
1614///
1615/// Returns a tuple of the rendered HTML string and whether the output was shortened
1616/// due to the provided `length_limit`.
1617fn markdown_summary_with_limit(
1618    md: &str,
1619    link_names: &[RenderedLink],
1620    length_limit: usize,
1621) -> (String, bool) {
1622    if md.is_empty() {
1623        return (String::new(), false);
1624    }
1625
1626    let mut replacer = |broken_link: BrokenLink<'_>| {
1627        link_names
1628            .iter()
1629            .find(|link| *link.original_text == *broken_link.reference)
1630            .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1631    };
1632
1633    let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1634    let mut p = LinkReplacer::new(p, link_names);
1635
1636    let mut buf = HtmlWithLimit::new(length_limit);
1637    let mut stopped_early = false;
1638    let _ = p.try_for_each(|event| {
1639        match &event {
1640            Event::Text(text) => {
1641                let r =
1642                    text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word));
1643                if r.is_break() {
1644                    stopped_early = true;
1645                }
1646                return r;
1647            }
1648            Event::Code(code) => {
1649                buf.open_tag("code");
1650                let r = buf.push(code);
1651                if r.is_break() {
1652                    stopped_early = true;
1653                } else {
1654                    buf.close_tag();
1655                }
1656                return r;
1657            }
1658            Event::Start(tag) => match tag {
1659                Tag::Emphasis => buf.open_tag("em"),
1660                Tag::Strong => buf.open_tag("strong"),
1661                Tag::CodeBlock(..) => return ControlFlow::Break(()),
1662                _ => {}
1663            },
1664            Event::End(tag) => match tag {
1665                TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
1666                TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
1667                _ => {}
1668            },
1669            Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
1670            _ => {}
1671        };
1672        ControlFlow::Continue(())
1673    });
1674
1675    (buf.finish(), stopped_early)
1676}
1677
1678/// Renders a shortened first paragraph of the given Markdown as a subset of Markdown,
1679/// making it suitable for contexts like the search index.
1680///
1681/// Will shorten to 59 or 60 characters, including an ellipsis (…) if it was shortened.
1682///
1683/// See [`markdown_summary_with_limit`] for details about what is rendered and what is not.
1684pub(crate) fn short_markdown_summary(markdown: &str, link_names: &[RenderedLink]) -> String {
1685    let (mut s, was_shortened) = markdown_summary_with_limit(markdown, link_names, 59);
1686
1687    if was_shortened {
1688        s.push('…');
1689    }
1690
1691    s
1692}
1693
1694/// Renders the first paragraph of the provided markdown as plain text.
1695/// Useful for alt-text.
1696///
1697/// - Headings, links, and formatting are stripped.
1698/// - Inline code is rendered as-is, surrounded by backticks.
1699/// - HTML and code blocks are ignored.
1700pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> String {
1701    if md.is_empty() {
1702        return String::new();
1703    }
1704
1705    let mut s = String::with_capacity(md.len() * 3 / 2);
1706
1707    let mut replacer = |broken_link: BrokenLink<'_>| {
1708        link_names
1709            .iter()
1710            .find(|link| *link.original_text == *broken_link.reference)
1711            .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1712    };
1713
1714    let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1715
1716    plain_text_from_events(p, &mut s);
1717
1718    s
1719}
1720
1721pub(crate) fn plain_text_from_events<'a>(
1722    events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1723    s: &mut String,
1724) {
1725    for event in events {
1726        match &event {
1727            Event::Text(text) => s.push_str(text),
1728            Event::Code(code) => {
1729                s.push('`');
1730                s.push_str(code);
1731                s.push('`');
1732            }
1733            Event::HardBreak | Event::SoftBreak => s.push(' '),
1734            Event::Start(Tag::CodeBlock(..)) => break,
1735            Event::End(TagEnd::Paragraph) => break,
1736            Event::End(TagEnd::Heading(..)) => break,
1737            _ => (),
1738        }
1739    }
1740}
1741
1742pub(crate) fn html_text_from_events<'a>(
1743    events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1744    s: &mut String,
1745) {
1746    for event in events {
1747        match &event {
1748            Event::Text(text) => {
1749                write!(s, "{}", EscapeBodyText(text)).expect("string alloc infallible")
1750            }
1751            Event::Code(code) => {
1752                s.push_str("<code>");
1753                write!(s, "{}", EscapeBodyText(code)).expect("string alloc infallible");
1754                s.push_str("</code>");
1755            }
1756            Event::HardBreak | Event::SoftBreak => s.push(' '),
1757            Event::Start(Tag::CodeBlock(..)) => break,
1758            Event::End(TagEnd::Paragraph) => break,
1759            Event::End(TagEnd::Heading(..)) => break,
1760            _ => (),
1761        }
1762    }
1763}
1764
1765#[derive(Debug)]
1766pub(crate) struct MarkdownLink {
1767    pub kind: LinkType,
1768    pub link: String,
1769    pub range: MarkdownLinkRange,
1770}
1771
1772#[derive(Clone, Debug)]
1773pub(crate) enum MarkdownLinkRange {
1774    /// Normally, markdown link warnings point only at the destination.
1775    Destination(Range<usize>),
1776    /// In some cases, it's not possible to point at the destination.
1777    /// Usually, this happens because backslashes `\\` are used.
1778    /// When that happens, point at the whole link, and don't provide structured suggestions.
1779    WholeLink(Range<usize>),
1780}
1781
1782impl MarkdownLinkRange {
1783    /// Extracts the inner range.
1784    pub fn inner_range(&self) -> &Range<usize> {
1785        match self {
1786            MarkdownLinkRange::Destination(range) => range,
1787            MarkdownLinkRange::WholeLink(range) => range,
1788        }
1789    }
1790}
1791
1792pub(crate) fn markdown_links<'md, R>(
1793    md: &'md str,
1794    preprocess_link: impl Fn(MarkdownLink) -> Option<R>,
1795) -> Vec<R> {
1796    use itertools::Itertools;
1797    if md.is_empty() {
1798        return vec![];
1799    }
1800
1801    // FIXME: remove this function once pulldown_cmark can provide spans for link definitions.
1802    let locate = |s: &str, fallback: Range<usize>| unsafe {
1803        let s_start = s.as_ptr();
1804        let s_end = s_start.add(s.len());
1805        let md_start = md.as_ptr();
1806        let md_end = md_start.add(md.len());
1807        if md_start <= s_start && s_end <= md_end {
1808            let start = s_start.offset_from(md_start) as usize;
1809            let end = s_end.offset_from(md_start) as usize;
1810            MarkdownLinkRange::Destination(start..end)
1811        } else {
1812            MarkdownLinkRange::WholeLink(fallback)
1813        }
1814    };
1815
1816    let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1817        // For diagnostics, we want to underline the link's definition but `span` will point at
1818        // where the link is used. This is a problem for reference-style links, where the definition
1819        // is separate from the usage.
1820
1821        match link {
1822            // `Borrowed` variant means the string (the link's destination) may come directly from
1823            // the markdown text and we can locate the original link destination.
1824            // NOTE: LinkReplacer also provides `Borrowed` but possibly from other sources,
1825            // so `locate()` can fall back to use `span`.
1826            CowStr::Borrowed(s) => locate(s, span),
1827
1828            // For anything else, we can only use the provided range.
1829            CowStr::Boxed(_) | CowStr::Inlined(_) => MarkdownLinkRange::WholeLink(span),
1830        }
1831    };
1832
1833    let span_for_refdef = |link: &CowStr<'_>, span: Range<usize>| {
1834        // We want to underline the link's definition, but `span` will point at the entire refdef.
1835        // Skip the label, then try to find the entire URL.
1836        let mut square_brace_count = 0;
1837        let mut iter = md.as_bytes()[span.start..span.end].iter().copied().enumerate();
1838        for (_i, c) in &mut iter {
1839            match c {
1840                b':' if square_brace_count == 0 => break,
1841                b'[' => square_brace_count += 1,
1842                b']' => square_brace_count -= 1,
1843                _ => {}
1844            }
1845        }
1846        while let Some((i, c)) = iter.next() {
1847            if c == b'<' {
1848                while let Some((j, c)) = iter.next() {
1849                    match c {
1850                        b'\\' => {
1851                            let _ = iter.next();
1852                        }
1853                        b'>' => {
1854                            return MarkdownLinkRange::Destination(
1855                                i + 1 + span.start..j + span.start,
1856                            );
1857                        }
1858                        _ => {}
1859                    }
1860                }
1861            } else if !c.is_ascii_whitespace() {
1862                for (j, c) in iter.by_ref() {
1863                    if c.is_ascii_whitespace() {
1864                        return MarkdownLinkRange::Destination(i + span.start..j + span.start);
1865                    }
1866                }
1867                return MarkdownLinkRange::Destination(i + span.start..span.end);
1868            }
1869        }
1870        span_for_link(link, span)
1871    };
1872
1873    let span_for_offset_backward = |span: Range<usize>, open: u8, close: u8| {
1874        let mut open_brace = !0;
1875        let mut close_brace = !0;
1876        for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate().rev() {
1877            let i = i + span.start;
1878            if b == close {
1879                close_brace = i;
1880                break;
1881            }
1882        }
1883        if close_brace < span.start || close_brace >= span.end {
1884            return MarkdownLinkRange::WholeLink(span);
1885        }
1886        let mut nesting = 1;
1887        for (i, b) in md.as_bytes()[span.start..close_brace].iter().copied().enumerate().rev() {
1888            let i = i + span.start;
1889            if b == close {
1890                nesting += 1;
1891            }
1892            if b == open {
1893                nesting -= 1;
1894            }
1895            if nesting == 0 {
1896                open_brace = i;
1897                break;
1898            }
1899        }
1900        assert!(open_brace != close_brace);
1901        if open_brace < span.start || open_brace >= span.end {
1902            return MarkdownLinkRange::WholeLink(span);
1903        }
1904        // do not actually include braces in the span
1905        let range = (open_brace + 1)..close_brace;
1906        MarkdownLinkRange::Destination(range)
1907    };
1908
1909    let span_for_offset_forward = |span: Range<usize>, open: u8, close: u8| {
1910        let mut open_brace = !0;
1911        let mut close_brace = !0;
1912        for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate() {
1913            let i = i + span.start;
1914            if b == open {
1915                open_brace = i;
1916                break;
1917            }
1918        }
1919        if open_brace < span.start || open_brace >= span.end {
1920            return MarkdownLinkRange::WholeLink(span);
1921        }
1922        let mut nesting = 0;
1923        for (i, b) in md.as_bytes()[open_brace..span.end].iter().copied().enumerate() {
1924            let i = i + open_brace;
1925            if b == close {
1926                nesting -= 1;
1927            }
1928            if b == open {
1929                nesting += 1;
1930            }
1931            if nesting == 0 {
1932                close_brace = i;
1933                break;
1934            }
1935        }
1936        assert!(open_brace != close_brace);
1937        if open_brace < span.start || open_brace >= span.end {
1938            return MarkdownLinkRange::WholeLink(span);
1939        }
1940        // do not actually include braces in the span
1941        let range = (open_brace + 1)..close_brace;
1942        MarkdownLinkRange::Destination(range)
1943    };
1944
1945    let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
1946    let event_iter = Parser::new_with_broken_link_callback(
1947        md,
1948        main_body_opts(),
1949        Some(&mut broken_link_callback),
1950    )
1951    .into_offset_iter();
1952    let mut links = Vec::new();
1953
1954    let mut refdefs = FxIndexMap::default();
1955    for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
1956        refdefs.insert(label.to_string(), (false, refdef.dest.to_string(), refdef.span.clone()));
1957    }
1958
1959    for (event, span) in event_iter {
1960        match event {
1961            Event::Start(Tag::Link { link_type, dest_url, id, .. })
1962                if may_be_doc_link(link_type) =>
1963            {
1964                let range = match link_type {
1965                    // Link is pulled from the link itself.
1966                    LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
1967                        span_for_offset_backward(span, b'[', b']')
1968                    }
1969                    LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'),
1970                    LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
1971                    // Link is pulled from elsewhere in the document.
1972                    LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
1973                        if let Some((is_used, dest_url, span)) = refdefs.get_mut(&id[..]) {
1974                            *is_used = true;
1975                            span_for_refdef(&CowStr::from(&dest_url[..]), span.clone())
1976                        } else {
1977                            span_for_link(&dest_url, span)
1978                        }
1979                    }
1980                    LinkType::Autolink | LinkType::Email => unreachable!(),
1981                };
1982
1983                if let Some(link) = preprocess_link(MarkdownLink {
1984                    kind: link_type,
1985                    link: dest_url.into_string(),
1986                    range,
1987                }) {
1988                    links.push(link);
1989                }
1990            }
1991            _ => {}
1992        }
1993    }
1994
1995    for (_label, (is_used, dest_url, span)) in refdefs.into_iter() {
1996        if !is_used
1997            && let Some(link) = preprocess_link(MarkdownLink {
1998                kind: LinkType::Reference,
1999                range: span_for_refdef(&CowStr::from(&dest_url[..]), span),
2000                link: dest_url,
2001            })
2002        {
2003            links.push(link);
2004        }
2005    }
2006
2007    links
2008}
2009
2010#[derive(Debug)]
2011pub(crate) struct RustCodeBlock {
2012    /// The range in the markdown that the code block occupies. Note that this includes the fences
2013    /// for fenced code blocks.
2014    pub(crate) range: Range<usize>,
2015    /// The range in the markdown that the code within the code block occupies.
2016    pub(crate) code: Range<usize>,
2017    pub(crate) is_fenced: bool,
2018    pub(crate) lang_string: LangString,
2019}
2020
2021/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
2022/// untagged (and assumed to be rust).
2023pub(crate) fn rust_code_blocks(md: &str, extra_info: &ExtraInfo<'_, '_>) -> Vec<RustCodeBlock> {
2024    let mut code_blocks = vec![];
2025
2026    if md.is_empty() {
2027        return code_blocks;
2028    }
2029
2030    let mut p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
2031
2032    while let Some((event, offset)) = p.next() {
2033        if let Event::Start(Tag::CodeBlock(syntax)) = event {
2034            let (lang_string, code_start, code_end, range, is_fenced) = match syntax {
2035                CodeBlockKind::Fenced(syntax) => {
2036                    let syntax = syntax.as_ref();
2037                    let lang_string = if syntax.is_empty() {
2038                        Default::default()
2039                    } else {
2040                        LangString::parse(syntax, ErrorCodes::Yes, Some(extra_info))
2041                    };
2042                    if !lang_string.rust {
2043                        continue;
2044                    }
2045                    let (code_start, mut code_end) = match p.next() {
2046                        Some((Event::Text(_), offset)) => (offset.start, offset.end),
2047                        Some((_, sub_offset)) => {
2048                            let code = Range { start: sub_offset.start, end: sub_offset.start };
2049                            code_blocks.push(RustCodeBlock {
2050                                is_fenced: true,
2051                                range: offset,
2052                                code,
2053                                lang_string,
2054                            });
2055                            continue;
2056                        }
2057                        None => {
2058                            let code = Range { start: offset.end, end: offset.end };
2059                            code_blocks.push(RustCodeBlock {
2060                                is_fenced: true,
2061                                range: offset,
2062                                code,
2063                                lang_string,
2064                            });
2065                            continue;
2066                        }
2067                    };
2068                    while let Some((Event::Text(_), offset)) = p.next() {
2069                        code_end = offset.end;
2070                    }
2071                    (lang_string, code_start, code_end, offset, true)
2072                }
2073                CodeBlockKind::Indented => {
2074                    // The ending of the offset goes too far sometime so we reduce it by one in
2075                    // these cases.
2076                    if offset.end > offset.start && md.get(offset.end..=offset.end) == Some("\n") {
2077                        (
2078                            LangString::default(),
2079                            offset.start,
2080                            offset.end,
2081                            Range { start: offset.start, end: offset.end - 1 },
2082                            false,
2083                        )
2084                    } else {
2085                        (LangString::default(), offset.start, offset.end, offset, false)
2086                    }
2087                }
2088            };
2089
2090            code_blocks.push(RustCodeBlock {
2091                is_fenced,
2092                range,
2093                code: Range { start: code_start, end: code_end },
2094                lang_string,
2095            });
2096        }
2097    }
2098
2099    code_blocks
2100}
2101
2102#[derive(Clone, Default, Debug)]
2103pub struct IdMap {
2104    map: FxHashMap<String, usize>,
2105    existing_footnotes: Arc<AtomicUsize>,
2106}
2107
2108fn is_default_id(id: &str) -> bool {
2109    matches!(
2110        id,
2111        // This is the list of IDs used in JavaScript.
2112        "help"
2113        | "settings"
2114        | "not-displayed"
2115        | "alternative-display"
2116        | "search"
2117        | "crate-search"
2118        | "crate-search-div"
2119        // This is the list of IDs used in HTML generated in Rust (including the ones
2120        // used in askama template files).
2121        | "themeStyle"
2122        | "settings-menu"
2123        | "help-button"
2124        | "sidebar-button"
2125        | "main-content"
2126        | "toggle-all-docs"
2127        | "all-types"
2128        | "default-settings"
2129        | "sidebar-vars"
2130        | "copy-path"
2131        | "rustdoc-toc"
2132        | "rustdoc-modnav"
2133        // This is the list of IDs used by rustdoc sections (but still generated by
2134        // rustdoc).
2135        | "fields"
2136        | "variants"
2137        | "implementors-list"
2138        | "synthetic-implementors-list"
2139        | "foreign-impls"
2140        | "implementations"
2141        | "trait-implementations"
2142        | "synthetic-implementations"
2143        | "blanket-implementations"
2144        | "required-associated-types"
2145        | "provided-associated-types"
2146        | "provided-associated-consts"
2147        | "required-associated-consts"
2148        | "required-methods"
2149        | "provided-methods"
2150        | "dyn-compatibility"
2151        | "implementors"
2152        | "synthetic-implementors"
2153        | "implementations-list"
2154        | "trait-implementations-list"
2155        | "synthetic-implementations-list"
2156        | "blanket-implementations-list"
2157        | "deref-methods"
2158        | "layout"
2159        | "aliased-type",
2160    )
2161}
2162
2163impl IdMap {
2164    pub fn new() -> Self {
2165        IdMap { map: FxHashMap::default(), existing_footnotes: Arc::new(AtomicUsize::new(0)) }
2166    }
2167
2168    pub(crate) fn derive<S: AsRef<str> + ToString>(&mut self, candidate: S) -> String {
2169        let id = match self.map.get_mut(candidate.as_ref()) {
2170            None => {
2171                let candidate = candidate.to_string();
2172                if is_default_id(&candidate) {
2173                    let id = format!("{}-{}", candidate, 1);
2174                    self.map.insert(candidate, 2);
2175                    id
2176                } else {
2177                    candidate
2178                }
2179            }
2180            Some(a) => {
2181                let id = format!("{}-{}", candidate.as_ref(), *a);
2182                *a += 1;
2183                id
2184            }
2185        };
2186
2187        self.map.insert(id.clone(), 1);
2188        id
2189    }
2190
2191    /// Method to handle `existing_footnotes` increment automatically (to prevent forgetting
2192    /// about it).
2193    pub(crate) fn handle_footnotes<'a, T, F: FnOnce(&'a mut Self, Weak<AtomicUsize>) -> T>(
2194        &'a mut self,
2195        closure: F,
2196    ) -> T {
2197        let existing_footnotes = Arc::downgrade(&self.existing_footnotes);
2198
2199        closure(self, existing_footnotes)
2200    }
2201
2202    pub(crate) fn clear(&mut self) {
2203        self.map.clear();
2204        self.existing_footnotes = Arc::new(AtomicUsize::new(0));
2205    }
2206}