rustdoc/html/
markdown.rs

1//! Markdown formatting for rustdoc.
2//!
3//! This module implements markdown formatting through the pulldown-cmark library.
4//!
5//! ```
6//! #![feature(rustc_private)]
7//!
8//! extern crate rustc_span;
9//!
10//! use rustc_span::edition::Edition;
11//! use rustdoc::html::markdown::{HeadingOffset, IdMap, Markdown, ErrorCodes};
12//!
13//! let s = "My *markdown* _text_";
14//! let mut id_map = IdMap::new();
15//! let md = Markdown {
16//!     content: s,
17//!     links: &[],
18//!     ids: &mut id_map,
19//!     error_codes: ErrorCodes::Yes,
20//!     edition: Edition::Edition2015,
21//!     playground: &None,
22//!     heading_offset: HeadingOffset::H2,
23//! };
24//! let html = md.into_string();
25//! // ... something using html
26//! ```
27
28use std::borrow::Cow;
29use std::collections::VecDeque;
30use std::fmt::Write;
31use std::iter::Peekable;
32use std::ops::{ControlFlow, Range};
33use std::path::PathBuf;
34use std::str::{self, CharIndices};
35use std::sync::atomic::AtomicUsize;
36use std::sync::{Arc, Weak};
37
38use pulldown_cmark::{
39    BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, html,
40};
41use rustc_data_structures::fx::FxHashMap;
42use rustc_errors::{Diag, DiagMessage};
43use rustc_hir::def_id::LocalDefId;
44use rustc_middle::ty::TyCtxt;
45pub(crate) use rustc_resolve::rustdoc::main_body_opts;
46use rustc_resolve::rustdoc::may_be_doc_link;
47use rustc_span::edition::Edition;
48use rustc_span::{Span, Symbol};
49use tracing::{debug, trace};
50
51use crate::clean::RenderedLink;
52use crate::doctest;
53use crate::doctest::GlobalTestOptions;
54use crate::html::escape::{Escape, EscapeBodyText};
55use crate::html::highlight;
56use crate::html::length_limit::HtmlWithLimit;
57use crate::html::render::small_url_encode;
58use crate::html::toc::{Toc, TocBuilder};
59
60mod footnotes;
61#[cfg(test)]
62mod tests;
63
64const MAX_HEADER_LEVEL: u32 = 6;
65
66/// Options for rendering Markdown in summaries (e.g., in search results).
67pub(crate) fn summary_opts() -> Options {
68    Options::ENABLE_TABLES
69        | Options::ENABLE_FOOTNOTES
70        | Options::ENABLE_STRIKETHROUGH
71        | Options::ENABLE_TASKLISTS
72        | Options::ENABLE_SMART_PUNCTUATION
73}
74
75#[derive(Debug, Clone, Copy)]
76pub enum HeadingOffset {
77    H1 = 0,
78    H2,
79    H3,
80    H4,
81    H5,
82    H6,
83}
84
85/// When `to_string` is called, this struct will emit the HTML corresponding to
86/// the rendered version of the contained markdown string.
87pub struct Markdown<'a> {
88    pub content: &'a str,
89    /// A list of link replacements.
90    pub links: &'a [RenderedLink],
91    /// The current list of used header IDs.
92    pub ids: &'a mut IdMap,
93    /// Whether to allow the use of explicit error codes in doctest lang strings.
94    pub error_codes: ErrorCodes,
95    /// Default edition to use when parsing doctests (to add a `fn main`).
96    pub edition: Edition,
97    pub playground: &'a Option<Playground>,
98    /// Offset at which we render headings.
99    /// E.g. if `heading_offset: HeadingOffset::H2`, then `# something` renders an `<h2>`.
100    pub heading_offset: HeadingOffset,
101}
102/// A struct like `Markdown` that renders the markdown with a table of contents.
103pub(crate) struct MarkdownWithToc<'a> {
104    pub(crate) content: &'a str,
105    pub(crate) links: &'a [RenderedLink],
106    pub(crate) ids: &'a mut IdMap,
107    pub(crate) error_codes: ErrorCodes,
108    pub(crate) edition: Edition,
109    pub(crate) playground: &'a Option<Playground>,
110}
111/// A tuple struct like `Markdown` that renders the markdown escaping HTML tags
112/// and includes no paragraph tags.
113pub(crate) struct MarkdownItemInfo<'a>(pub(crate) &'a str, pub(crate) &'a mut IdMap);
114/// A tuple struct like `Markdown` that renders only the first paragraph.
115pub(crate) struct MarkdownSummaryLine<'a>(pub &'a str, pub &'a [RenderedLink]);
116
117#[derive(Copy, Clone, PartialEq, Debug)]
118pub enum ErrorCodes {
119    Yes,
120    No,
121}
122
123impl ErrorCodes {
124    pub(crate) fn from(b: bool) -> Self {
125        match b {
126            true => ErrorCodes::Yes,
127            false => ErrorCodes::No,
128        }
129    }
130
131    pub(crate) fn as_bool(self) -> bool {
132        match self {
133            ErrorCodes::Yes => true,
134            ErrorCodes::No => false,
135        }
136    }
137}
138
139/// Controls whether a line will be hidden or shown in HTML output.
140///
141/// All lines are used in documentation tests.
142pub(crate) enum Line<'a> {
143    Hidden(&'a str),
144    Shown(Cow<'a, str>),
145}
146
147impl<'a> Line<'a> {
148    fn for_html(self) -> Option<Cow<'a, str>> {
149        match self {
150            Line::Shown(l) => Some(l),
151            Line::Hidden(_) => None,
152        }
153    }
154
155    pub(crate) fn for_code(self) -> Cow<'a, str> {
156        match self {
157            Line::Shown(l) => l,
158            Line::Hidden(l) => Cow::Borrowed(l),
159        }
160    }
161}
162
163/// This function is used to handle the "hidden lines" (ie starting with `#`) in
164/// doctests. It also transforms `##` back into `#`.
165// FIXME: There is a minor inconsistency here. For lines that start with ##, we
166// have no easy way of removing a potential single space after the hashes, which
167// is done in the single # case. This inconsistency seems okay, if non-ideal. In
168// order to fix it we'd have to iterate to find the first non-# character, and
169// then reallocate to remove it; which would make us return a String.
170pub(crate) fn map_line(s: &str) -> Line<'_> {
171    let trimmed = s.trim();
172    if trimmed.starts_with("##") {
173        Line::Shown(Cow::Owned(s.replacen("##", "#", 1)))
174    } else if let Some(stripped) = trimmed.strip_prefix("# ") {
175        // # text
176        Line::Hidden(stripped)
177    } else if trimmed == "#" {
178        // We cannot handle '#text' because it could be #[attr].
179        Line::Hidden("")
180    } else {
181        Line::Shown(Cow::Borrowed(s))
182    }
183}
184
185/// Convert chars from a title for an id.
186///
187/// "Hello, world!" -> "hello-world"
188fn slugify(c: char) -> Option<char> {
189    if c.is_alphanumeric() || c == '-' || c == '_' {
190        if c.is_ascii() { Some(c.to_ascii_lowercase()) } else { Some(c) }
191    } else if c.is_whitespace() && c.is_ascii() {
192        Some('-')
193    } else {
194        None
195    }
196}
197
198#[derive(Clone, Debug)]
199pub struct Playground {
200    pub crate_name: Option<Symbol>,
201    pub url: String,
202}
203
204/// Adds syntax highlighting and playground Run buttons to Rust code blocks.
205struct CodeBlocks<'p, 'a, I: Iterator<Item = Event<'a>>> {
206    inner: I,
207    check_error_codes: ErrorCodes,
208    edition: Edition,
209    // Information about the playground if a URL has been specified, containing an
210    // optional crate name and the URL.
211    playground: &'p Option<Playground>,
212}
213
214impl<'p, 'a, I: Iterator<Item = Event<'a>>> CodeBlocks<'p, 'a, I> {
215    fn new(
216        iter: I,
217        error_codes: ErrorCodes,
218        edition: Edition,
219        playground: &'p Option<Playground>,
220    ) -> Self {
221        CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground }
222    }
223}
224
225impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
226    type Item = Event<'a>;
227
228    fn next(&mut self) -> Option<Self::Item> {
229        let event = self.inner.next();
230        let Some(Event::Start(Tag::CodeBlock(kind))) = event else {
231            return event;
232        };
233
234        let mut original_text = String::new();
235        for event in &mut self.inner {
236            match event {
237                Event::End(TagEnd::CodeBlock) => break,
238                Event::Text(ref s) => {
239                    original_text.push_str(s);
240                }
241                _ => {}
242            }
243        }
244
245        let LangString { added_classes, compile_fail, should_panic, ignore, edition, .. } =
246            match kind {
247                CodeBlockKind::Fenced(ref lang) => {
248                    let parse_result =
249                        LangString::parse_without_check(lang, self.check_error_codes, false);
250                    if !parse_result.rust {
251                        let added_classes = parse_result.added_classes;
252                        let lang_string = if let Some(lang) = parse_result.unknown.first() {
253                            format!("language-{}", lang)
254                        } else {
255                            String::new()
256                        };
257                        let whitespace = if added_classes.is_empty() { "" } else { " " };
258                        return Some(Event::Html(
259                            format!(
260                                "<div class=\"example-wrap\">\
261                                 <pre class=\"{lang_string}{whitespace}{added_classes}\">\
262                                     <code>{text}</code>\
263                                 </pre>\
264                             </div>",
265                                added_classes = added_classes.join(" "),
266                                text = Escape(
267                                    original_text.strip_suffix('\n').unwrap_or(&original_text)
268                                ),
269                            )
270                            .into(),
271                        ));
272                    }
273                    parse_result
274                }
275                CodeBlockKind::Indented => Default::default(),
276            };
277
278        let lines = original_text.lines().filter_map(|l| map_line(l).for_html());
279        let text = lines.intersperse("\n".into()).collect::<String>();
280
281        let explicit_edition = edition.is_some();
282        let edition = edition.unwrap_or(self.edition);
283
284        let playground_button = self.playground.as_ref().and_then(|playground| {
285            let krate = &playground.crate_name;
286            let url = &playground.url;
287            if url.is_empty() {
288                return None;
289            }
290            let test = original_text
291                .lines()
292                .map(|l| map_line(l).for_code())
293                .intersperse("\n".into())
294                .collect::<String>();
295            let krate = krate.as_ref().map(|s| s.as_str());
296
297            // FIXME: separate out the code to make a code block into runnable code
298            //        from the complicated doctest logic
299            let opts = GlobalTestOptions {
300                crate_name: krate.map(String::from).unwrap_or_default(),
301                no_crate_inject: false,
302                insert_indent_space: true,
303                attrs: vec![],
304                args_file: PathBuf::new(),
305            };
306            let doctest = doctest::DocTestBuilder::new(&test, krate, edition, false, None, None);
307            let (test, _) = doctest.generate_unique_doctest(&test, false, &opts, krate);
308            let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
309
310            let test_escaped = small_url_encode(test);
311            Some(format!(
312                "<a class=\"test-arrow\" \
313                    target=\"_blank\" \
314                    title=\"Run code\" \
315                    href=\"{url}?code={test_escaped}{channel}&amp;edition={edition}\"></a>",
316            ))
317        });
318
319        let tooltip = if ignore != Ignore::None {
320            highlight::Tooltip::Ignore
321        } else if compile_fail {
322            highlight::Tooltip::CompileFail
323        } else if should_panic {
324            highlight::Tooltip::ShouldPanic
325        } else if explicit_edition {
326            highlight::Tooltip::Edition(edition)
327        } else {
328            highlight::Tooltip::None
329        };
330
331        // insert newline to clearly separate it from the
332        // previous block so we can shorten the html output
333        let mut s = String::new();
334        s.push('\n');
335
336        highlight::render_example_with_highlighting(
337            &text,
338            &mut s,
339            tooltip,
340            playground_button.as_deref(),
341            &added_classes,
342        );
343        Some(Event::Html(s.into()))
344    }
345}
346
347/// Make headings links with anchor IDs and build up TOC.
348struct LinkReplacerInner<'a> {
349    links: &'a [RenderedLink],
350    shortcut_link: Option<&'a RenderedLink>,
351}
352
353struct LinkReplacer<'a, I: Iterator<Item = Event<'a>>> {
354    iter: I,
355    inner: LinkReplacerInner<'a>,
356}
357
358impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, I> {
359    fn new(iter: I, links: &'a [RenderedLink]) -> Self {
360        LinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
361    }
362}
363
364// FIXME: Once we have specialized trait impl (for `Iterator` impl on `LinkReplacer`),
365// we can remove this type and move back `LinkReplacerInner` fields into `LinkReplacer`.
366struct SpannedLinkReplacer<'a, I: Iterator<Item = SpannedEvent<'a>>> {
367    iter: I,
368    inner: LinkReplacerInner<'a>,
369}
370
371impl<'a, I: Iterator<Item = SpannedEvent<'a>>> SpannedLinkReplacer<'a, I> {
372    fn new(iter: I, links: &'a [RenderedLink]) -> Self {
373        SpannedLinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
374    }
375}
376
377impl<'a> LinkReplacerInner<'a> {
378    fn handle_event(&mut self, event: &mut Event<'a>) {
379        // Replace intra-doc links and remove disambiguators from shortcut links (`[fn@f]`).
380        match event {
381            // This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]`
382            // Remove any disambiguator.
383            Event::Start(Tag::Link {
384                // [fn@f] or [fn@f][]
385                link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
386                dest_url,
387                title,
388                ..
389            }) => {
390                debug!("saw start of shortcut link to {dest_url} with title {title}");
391                // If this is a shortcut link, it was resolved by the broken_link_callback.
392                // So the URL will already be updated properly.
393                let link = self.links.iter().find(|&link| *link.href == **dest_url);
394                // Since this is an external iterator, we can't replace the inner text just yet.
395                // Store that we saw a link so we know to replace it later.
396                if let Some(link) = link {
397                    trace!("it matched");
398                    assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested");
399                    self.shortcut_link = Some(link);
400                    if title.is_empty() && !link.tooltip.is_empty() {
401                        *title = CowStr::Borrowed(link.tooltip.as_ref());
402                    }
403                }
404            }
405            // Now that we're done with the shortcut link, don't replace any more text.
406            Event::End(TagEnd::Link) if self.shortcut_link.is_some() => {
407                debug!("saw end of shortcut link");
408                self.shortcut_link = None;
409            }
410            // Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link.
411            // [`fn@f`]
412            Event::Code(text) => {
413                trace!("saw code {text}");
414                if let Some(link) = self.shortcut_link {
415                    // NOTE: this only replaces if the code block is the *entire* text.
416                    // If only part of the link has code highlighting, the disambiguator will not be removed.
417                    // e.g. [fn@`f`]
418                    // This is a limitation from `collect_intra_doc_links`: it passes a full link,
419                    // and does not distinguish at all between code blocks.
420                    // So we could never be sure we weren't replacing too much:
421                    // [fn@my_`f`unc] is treated the same as [my_func()] in that pass.
422                    //
423                    // NOTE: .get(1..len() - 1) is to strip the backticks
424                    if let Some(link) = self.links.iter().find(|l| {
425                        l.href == link.href
426                            && Some(&**text) == l.original_text.get(1..l.original_text.len() - 1)
427                    }) {
428                        debug!("replacing {text} with {new_text}", new_text = link.new_text);
429                        *text = CowStr::Borrowed(&link.new_text);
430                    }
431                }
432            }
433            // Replace plain text in links, but only in the middle of a shortcut link.
434            // [fn@f]
435            Event::Text(text) => {
436                trace!("saw text {text}");
437                if let Some(link) = self.shortcut_link {
438                    // NOTE: same limitations as `Event::Code`
439                    if let Some(link) = self
440                        .links
441                        .iter()
442                        .find(|l| l.href == link.href && **text == *l.original_text)
443                    {
444                        debug!("replacing {text} with {new_text}", new_text = link.new_text);
445                        *text = CowStr::Borrowed(&link.new_text);
446                    }
447                }
448            }
449            // If this is a link, but not a shortcut link,
450            // replace the URL, since the broken_link_callback was not called.
451            Event::Start(Tag::Link { dest_url, title, .. }) => {
452                if let Some(link) =
453                    self.links.iter().find(|&link| *link.original_text == **dest_url)
454                {
455                    *dest_url = CowStr::Borrowed(link.href.as_ref());
456                    if title.is_empty() && !link.tooltip.is_empty() {
457                        *title = CowStr::Borrowed(link.tooltip.as_ref());
458                    }
459                }
460            }
461            // Anything else couldn't have been a valid Rust path, so no need to replace the text.
462            _ => {}
463        }
464    }
465}
466
467impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
468    type Item = Event<'a>;
469
470    fn next(&mut self) -> Option<Self::Item> {
471        let mut event = self.iter.next();
472        if let Some(ref mut event) = event {
473            self.inner.handle_event(event);
474        }
475        // Yield the modified event
476        event
477    }
478}
479
480impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for SpannedLinkReplacer<'a, I> {
481    type Item = SpannedEvent<'a>;
482
483    fn next(&mut self) -> Option<Self::Item> {
484        let (mut event, range) = self.iter.next()?;
485        self.inner.handle_event(&mut event);
486        // Yield the modified event
487        Some((event, range))
488    }
489}
490
491/// Wrap HTML tables into `<div>` to prevent having the doc blocks width being too big.
492struct TableWrapper<'a, I: Iterator<Item = Event<'a>>> {
493    inner: I,
494    stored_events: VecDeque<Event<'a>>,
495}
496
497impl<'a, I: Iterator<Item = Event<'a>>> TableWrapper<'a, I> {
498    fn new(iter: I) -> Self {
499        Self { inner: iter, stored_events: VecDeque::new() }
500    }
501}
502
503impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> {
504    type Item = Event<'a>;
505
506    fn next(&mut self) -> Option<Self::Item> {
507        if let Some(first) = self.stored_events.pop_front() {
508            return Some(first);
509        }
510
511        let event = self.inner.next()?;
512
513        Some(match event {
514            Event::Start(Tag::Table(t)) => {
515                self.stored_events.push_back(Event::Start(Tag::Table(t)));
516                Event::Html(CowStr::Borrowed("<div>"))
517            }
518            Event::End(TagEnd::Table) => {
519                self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
520                Event::End(TagEnd::Table)
521            }
522            e => e,
523        })
524    }
525}
526
527type SpannedEvent<'a> = (Event<'a>, Range<usize>);
528
529/// Make headings links with anchor IDs and build up TOC.
530struct HeadingLinks<'a, 'b, 'ids, I> {
531    inner: I,
532    toc: Option<&'b mut TocBuilder>,
533    buf: VecDeque<SpannedEvent<'a>>,
534    id_map: &'ids mut IdMap,
535    heading_offset: HeadingOffset,
536}
537
538impl<'b, 'ids, I> HeadingLinks<'_, 'b, 'ids, I> {
539    fn new(
540        iter: I,
541        toc: Option<&'b mut TocBuilder>,
542        ids: &'ids mut IdMap,
543        heading_offset: HeadingOffset,
544    ) -> Self {
545        HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids, heading_offset }
546    }
547}
548
549impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for HeadingLinks<'a, '_, '_, I> {
550    type Item = SpannedEvent<'a>;
551
552    fn next(&mut self) -> Option<Self::Item> {
553        if let Some(e) = self.buf.pop_front() {
554            return Some(e);
555        }
556
557        let event = self.inner.next();
558        if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
559            let mut id = String::new();
560            for event in &mut self.inner {
561                match &event.0 {
562                    Event::End(TagEnd::Heading(_)) => break,
563                    Event::Text(text) | Event::Code(text) => {
564                        id.extend(text.chars().filter_map(slugify));
565                        self.buf.push_back(event);
566                    }
567                    _ => self.buf.push_back(event),
568                }
569            }
570            let id = self.id_map.derive(id);
571
572            if let Some(ref mut builder) = self.toc {
573                let mut text_header = String::new();
574                plain_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut text_header);
575                let mut html_header = String::new();
576                html_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut html_header);
577                let sec = builder.push(level as u32, text_header, html_header, id.clone());
578                self.buf.push_front((Event::Html(format!("{sec} ").into()), 0..0));
579            }
580
581            let level =
582                std::cmp::min(level as u32 + (self.heading_offset as u32), MAX_HEADER_LEVEL);
583            self.buf.push_back((Event::Html(format!("</h{level}>").into()), 0..0));
584
585            let start_tags =
586                format!("<h{level} id=\"{id}\"><a class=\"doc-anchor\" href=\"#{id}\">§</a>");
587            return Some((Event::Html(start_tags.into()), 0..0));
588        }
589        event
590    }
591}
592
593/// Extracts just the first paragraph.
594struct SummaryLine<'a, I: Iterator<Item = Event<'a>>> {
595    inner: I,
596    started: bool,
597    depth: u32,
598    skipped_tags: u32,
599}
600
601impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> {
602    fn new(iter: I) -> Self {
603        SummaryLine { inner: iter, started: false, depth: 0, skipped_tags: 0 }
604    }
605}
606
607fn check_if_allowed_tag(t: &TagEnd) -> bool {
608    matches!(
609        t,
610        TagEnd::Paragraph
611            | TagEnd::Emphasis
612            | TagEnd::Strong
613            | TagEnd::Strikethrough
614            | TagEnd::Link
615            | TagEnd::BlockQuote
616    )
617}
618
619fn is_forbidden_tag(t: &TagEnd) -> bool {
620    matches!(
621        t,
622        TagEnd::CodeBlock
623            | TagEnd::Table
624            | TagEnd::TableHead
625            | TagEnd::TableRow
626            | TagEnd::TableCell
627            | TagEnd::FootnoteDefinition
628    )
629}
630
631impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
632    type Item = Event<'a>;
633
634    fn next(&mut self) -> Option<Self::Item> {
635        if self.started && self.depth == 0 {
636            return None;
637        }
638        if !self.started {
639            self.started = true;
640        }
641        if let Some(event) = self.inner.next() {
642            let mut is_start = true;
643            let is_allowed_tag = match event {
644                Event::Start(ref c) => {
645                    if is_forbidden_tag(&c.to_end()) {
646                        self.skipped_tags += 1;
647                        return None;
648                    }
649                    self.depth += 1;
650                    check_if_allowed_tag(&c.to_end())
651                }
652                Event::End(ref c) => {
653                    if is_forbidden_tag(c) {
654                        self.skipped_tags += 1;
655                        return None;
656                    }
657                    self.depth -= 1;
658                    is_start = false;
659                    check_if_allowed_tag(c)
660                }
661                Event::FootnoteReference(_) => {
662                    self.skipped_tags += 1;
663                    false
664                }
665                _ => true,
666            };
667            if !is_allowed_tag {
668                self.skipped_tags += 1;
669            }
670            return if !is_allowed_tag {
671                if is_start {
672                    Some(Event::Start(Tag::Paragraph))
673                } else {
674                    Some(Event::End(TagEnd::Paragraph))
675                }
676            } else {
677                Some(event)
678            };
679        }
680        None
681    }
682}
683
684/// A newtype that represents a relative line number in Markdown.
685///
686/// In other words, this represents an offset from the first line of Markdown
687/// in a doc comment or other source. If the first Markdown line appears on line 32,
688/// and the `MdRelLine` is 3, then the absolute line for this one is 35. I.e., it's
689/// a zero-based offset.
690pub(crate) struct MdRelLine {
691    offset: usize,
692}
693
694impl MdRelLine {
695    /// See struct docs.
696    pub(crate) const fn new(offset: usize) -> Self {
697        Self { offset }
698    }
699
700    /// See struct docs.
701    pub(crate) const fn offset(self) -> usize {
702        self.offset
703    }
704}
705
706pub(crate) fn find_testable_code<T: doctest::DocTestVisitor>(
707    doc: &str,
708    tests: &mut T,
709    error_codes: ErrorCodes,
710    enable_per_target_ignores: bool,
711    extra_info: Option<&ExtraInfo<'_>>,
712) {
713    find_codes(doc, tests, error_codes, enable_per_target_ignores, extra_info, false)
714}
715
716pub(crate) fn find_codes<T: doctest::DocTestVisitor>(
717    doc: &str,
718    tests: &mut T,
719    error_codes: ErrorCodes,
720    enable_per_target_ignores: bool,
721    extra_info: Option<&ExtraInfo<'_>>,
722    include_non_rust: bool,
723) {
724    let mut parser = Parser::new(doc).into_offset_iter();
725    let mut prev_offset = 0;
726    let mut nb_lines = 0;
727    let mut register_header = None;
728    while let Some((event, offset)) = parser.next() {
729        match event {
730            Event::Start(Tag::CodeBlock(kind)) => {
731                let block_info = match kind {
732                    CodeBlockKind::Fenced(ref lang) => {
733                        if lang.is_empty() {
734                            Default::default()
735                        } else {
736                            LangString::parse(
737                                lang,
738                                error_codes,
739                                enable_per_target_ignores,
740                                extra_info,
741                            )
742                        }
743                    }
744                    CodeBlockKind::Indented => Default::default(),
745                };
746                if !include_non_rust && !block_info.rust {
747                    continue;
748                }
749
750                let mut test_s = String::new();
751
752                while let Some((Event::Text(s), _)) = parser.next() {
753                    test_s.push_str(&s);
754                }
755                let text = test_s
756                    .lines()
757                    .map(|l| map_line(l).for_code())
758                    .collect::<Vec<Cow<'_, str>>>()
759                    .join("\n");
760
761                nb_lines += doc[prev_offset..offset.start].lines().count();
762                // If there are characters between the preceding line ending and
763                // this code block, `str::lines` will return an additional line,
764                // which we subtract here.
765                if nb_lines != 0 && !&doc[prev_offset..offset.start].ends_with('\n') {
766                    nb_lines -= 1;
767                }
768                let line = MdRelLine::new(nb_lines);
769                tests.visit_test(text, block_info, line);
770                prev_offset = offset.start;
771            }
772            Event::Start(Tag::Heading { level, .. }) => {
773                register_header = Some(level as u32);
774            }
775            Event::Text(ref s) if register_header.is_some() => {
776                let level = register_header.unwrap();
777                tests.visit_header(s, level);
778                register_header = None;
779            }
780            _ => {}
781        }
782    }
783}
784
785pub(crate) struct ExtraInfo<'tcx> {
786    def_id: LocalDefId,
787    sp: Span,
788    tcx: TyCtxt<'tcx>,
789}
790
791impl<'tcx> ExtraInfo<'tcx> {
792    pub(crate) fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, sp: Span) -> ExtraInfo<'tcx> {
793        ExtraInfo { def_id, sp, tcx }
794    }
795
796    fn error_invalid_codeblock_attr(&self, msg: impl Into<DiagMessage>) {
797        self.tcx.node_span_lint(
798            crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
799            self.tcx.local_def_id_to_hir_id(self.def_id),
800            self.sp,
801            |lint| {
802                lint.primary_message(msg);
803            },
804        );
805    }
806
807    fn error_invalid_codeblock_attr_with_help(
808        &self,
809        msg: impl Into<DiagMessage>,
810        f: impl for<'a, 'b> FnOnce(&'b mut Diag<'a, ()>),
811    ) {
812        self.tcx.node_span_lint(
813            crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
814            self.tcx.local_def_id_to_hir_id(self.def_id),
815            self.sp,
816            |lint| {
817                lint.primary_message(msg);
818                f(lint);
819            },
820        );
821    }
822}
823
824#[derive(Eq, PartialEq, Clone, Debug)]
825pub(crate) struct LangString {
826    pub(crate) original: String,
827    pub(crate) should_panic: bool,
828    pub(crate) no_run: bool,
829    pub(crate) ignore: Ignore,
830    pub(crate) rust: bool,
831    pub(crate) test_harness: bool,
832    pub(crate) compile_fail: bool,
833    pub(crate) standalone_crate: bool,
834    pub(crate) error_codes: Vec<String>,
835    pub(crate) edition: Option<Edition>,
836    pub(crate) added_classes: Vec<String>,
837    pub(crate) unknown: Vec<String>,
838}
839
840#[derive(Eq, PartialEq, Clone, Debug)]
841pub(crate) enum Ignore {
842    All,
843    None,
844    Some(Vec<String>),
845}
846
847/// This is the parser for fenced codeblocks attributes. It implements the following eBNF:
848///
849/// ```eBNF
850/// lang-string = *(token-list / delimited-attribute-list / comment)
851///
852/// bareword = LEADINGCHAR *(CHAR)
853/// bareword-without-leading-char = CHAR *(CHAR)
854/// quoted-string = QUOTE *(NONQUOTE) QUOTE
855/// token = bareword / quoted-string
856/// token-without-leading-char = bareword-without-leading-char / quoted-string
857/// sep = COMMA/WS *(COMMA/WS)
858/// attribute = (DOT token)/(token EQUAL token-without-leading-char)
859/// attribute-list = [sep] attribute *(sep attribute) [sep]
860/// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
861/// token-list = [sep] token *(sep token) [sep]
862/// comment = OPEN_PAREN *(all characters) CLOSE_PAREN
863///
864/// OPEN_PAREN = "("
865/// CLOSE_PARENT = ")"
866/// OPEN-CURLY-BRACKET = "{"
867/// CLOSE-CURLY-BRACKET = "}"
868/// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
869/// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
870/// ; Comma is used to separate language tokens, so it can't be used in one.
871/// ; Quote is used to allow otherwise-disallowed characters in language tokens.
872/// ; Equals is used to make key=value pairs in attribute blocks.
873/// ; Backslash and grave are special Markdown characters.
874/// ; Braces are used to start an attribute block.
875/// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
876///        ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
877/// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
878/// COMMA = ","
879/// DOT = "."
880/// EQUAL = "="
881///
882/// ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
883/// DIGIT = %x30-39
884/// WS = %x09 / " "
885/// ```
886pub(crate) struct TagIterator<'a, 'tcx> {
887    inner: Peekable<CharIndices<'a>>,
888    data: &'a str,
889    is_in_attribute_block: bool,
890    extra: Option<&'a ExtraInfo<'tcx>>,
891    is_error: bool,
892}
893
894#[derive(Clone, Debug, Eq, PartialEq)]
895pub(crate) enum LangStringToken<'a> {
896    LangToken(&'a str),
897    ClassAttribute(&'a str),
898    KeyValueAttribute(&'a str, &'a str),
899}
900
901fn is_leading_char(c: char) -> bool {
902    c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
903}
904fn is_bareword_char(c: char) -> bool {
905    is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
906}
907fn is_separator(c: char) -> bool {
908    c == ' ' || c == ',' || c == '\t'
909}
910
911struct Indices {
912    start: usize,
913    end: usize,
914}
915
916impl<'a, 'tcx> TagIterator<'a, 'tcx> {
917    pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'tcx>>) -> Self {
918        Self {
919            inner: data.char_indices().peekable(),
920            data,
921            is_in_attribute_block: false,
922            extra,
923            is_error: false,
924        }
925    }
926
927    fn emit_error(&mut self, err: impl Into<DiagMessage>) {
928        if let Some(extra) = self.extra {
929            extra.error_invalid_codeblock_attr(err);
930        }
931        self.is_error = true;
932    }
933
934    fn skip_separators(&mut self) -> Option<usize> {
935        while let Some((pos, c)) = self.inner.peek() {
936            if !is_separator(*c) {
937                return Some(*pos);
938            }
939            self.inner.next();
940        }
941        None
942    }
943
944    fn parse_string(&mut self, start: usize) -> Option<Indices> {
945        for (pos, c) in self.inner.by_ref() {
946            if c == '"' {
947                return Some(Indices { start: start + 1, end: pos });
948            }
949        }
950        self.emit_error("unclosed quote string `\"`");
951        None
952    }
953
954    fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> {
955        while let Some((pos, c)) = self.inner.peek().copied() {
956            if is_bareword_char(c) {
957                self.inner.next();
958            } else {
959                let class = &self.data[start + 1..pos];
960                if class.is_empty() {
961                    self.emit_error(format!("unexpected `{c}` character after `.`"));
962                    return None;
963                } else if self.check_after_token() {
964                    return Some(LangStringToken::ClassAttribute(class));
965                } else {
966                    return None;
967                }
968            }
969        }
970        let class = &self.data[start + 1..];
971        if class.is_empty() {
972            self.emit_error("missing character after `.`");
973            None
974        } else if self.check_after_token() {
975            Some(LangStringToken::ClassAttribute(class))
976        } else {
977            None
978        }
979    }
980
981    fn parse_token(&mut self, start: usize) -> Option<Indices> {
982        while let Some((pos, c)) = self.inner.peek() {
983            if !is_bareword_char(*c) {
984                return Some(Indices { start, end: *pos });
985            }
986            self.inner.next();
987        }
988        self.emit_error("unexpected end");
989        None
990    }
991
992    fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> {
993        let key_indices =
994            if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? };
995        if key_indices.start == key_indices.end {
996            self.emit_error("unexpected empty string as key");
997            return None;
998        }
999
1000        if let Some((_, c)) = self.inner.next() {
1001            if c != '=' {
1002                self.emit_error(format!("expected `=`, found `{}`", c));
1003                return None;
1004            }
1005        } else {
1006            self.emit_error("unexpected end");
1007            return None;
1008        }
1009        let value_indices = match self.inner.next() {
1010            Some((pos, '"')) => self.parse_string(pos)?,
1011            Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?,
1012            Some((_, c)) => {
1013                self.emit_error(format!("unexpected `{c}` character after `=`"));
1014                return None;
1015            }
1016            None => {
1017                self.emit_error("expected value after `=`");
1018                return None;
1019            }
1020        };
1021        if value_indices.start == value_indices.end {
1022            self.emit_error("unexpected empty string as value");
1023            None
1024        } else if self.check_after_token() {
1025            Some(LangStringToken::KeyValueAttribute(
1026                &self.data[key_indices.start..key_indices.end],
1027                &self.data[value_indices.start..value_indices.end],
1028            ))
1029        } else {
1030            None
1031        }
1032    }
1033
1034    /// Returns `false` if an error was emitted.
1035    fn check_after_token(&mut self) -> bool {
1036        if let Some((_, c)) = self.inner.peek().copied() {
1037            if c == '}' || is_separator(c) || c == '(' {
1038                true
1039            } else {
1040                self.emit_error(format!("unexpected `{c}` character"));
1041                false
1042            }
1043        } else {
1044            // The error will be caught on the next iteration.
1045            true
1046        }
1047    }
1048
1049    fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> {
1050        if let Some((pos, c)) = self.inner.next() {
1051            if c == '}' {
1052                self.is_in_attribute_block = false;
1053                return self.next();
1054            } else if c == '.' {
1055                return self.parse_class(pos);
1056            } else if c == '"' || is_leading_char(c) {
1057                return self.parse_key_value(c, pos);
1058            } else {
1059                self.emit_error(format!("unexpected character `{c}`"));
1060                return None;
1061            }
1062        }
1063        self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1064        None
1065    }
1066
1067    /// Returns `false` if an error was emitted.
1068    fn skip_paren_block(&mut self) -> bool {
1069        for (_, c) in self.inner.by_ref() {
1070            if c == ')' {
1071                return true;
1072            }
1073        }
1074        self.emit_error("unclosed comment: missing `)` at the end");
1075        false
1076    }
1077
1078    fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> {
1079        while let Some((pos, c)) = self.inner.next() {
1080            if c == '"' {
1081                if pos != start {
1082                    self.emit_error("expected ` `, `{` or `,` found `\"`");
1083                    return None;
1084                }
1085                let indices = self.parse_string(pos)?;
1086                if let Some((_, c)) = self.inner.peek().copied()
1087                    && c != '{'
1088                    && !is_separator(c)
1089                    && c != '('
1090                {
1091                    self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
1092                    return None;
1093                }
1094                return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
1095            } else if c == '{' {
1096                self.is_in_attribute_block = true;
1097                return self.next();
1098            } else if is_separator(c) {
1099                if pos != start {
1100                    return Some(LangStringToken::LangToken(&self.data[start..pos]));
1101                }
1102                return self.next();
1103            } else if c == '(' {
1104                if !self.skip_paren_block() {
1105                    return None;
1106                }
1107                if pos != start {
1108                    return Some(LangStringToken::LangToken(&self.data[start..pos]));
1109                }
1110                return self.next();
1111            } else if (pos == start && is_leading_char(c)) || (pos != start && is_bareword_char(c))
1112            {
1113                continue;
1114            } else {
1115                self.emit_error(format!("unexpected character `{c}`"));
1116                return None;
1117            }
1118        }
1119        let token = &self.data[start..];
1120        if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) }
1121    }
1122}
1123
1124impl<'a> Iterator for TagIterator<'a, '_> {
1125    type Item = LangStringToken<'a>;
1126
1127    fn next(&mut self) -> Option<Self::Item> {
1128        if self.is_error {
1129            return None;
1130        }
1131        let Some(start) = self.skip_separators() else {
1132            if self.is_in_attribute_block {
1133                self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1134            }
1135            return None;
1136        };
1137        if self.is_in_attribute_block {
1138            self.parse_in_attribute_block()
1139        } else {
1140            self.parse_outside_attribute_block(start)
1141        }
1142    }
1143}
1144
1145impl Default for LangString {
1146    fn default() -> Self {
1147        Self {
1148            original: String::new(),
1149            should_panic: false,
1150            no_run: false,
1151            ignore: Ignore::None,
1152            rust: true,
1153            test_harness: false,
1154            compile_fail: false,
1155            standalone_crate: false,
1156            error_codes: Vec::new(),
1157            edition: None,
1158            added_classes: Vec::new(),
1159            unknown: Vec::new(),
1160        }
1161    }
1162}
1163
1164impl LangString {
1165    fn parse_without_check(
1166        string: &str,
1167        allow_error_code_check: ErrorCodes,
1168        enable_per_target_ignores: bool,
1169    ) -> Self {
1170        Self::parse(string, allow_error_code_check, enable_per_target_ignores, None)
1171    }
1172
1173    fn parse(
1174        string: &str,
1175        allow_error_code_check: ErrorCodes,
1176        enable_per_target_ignores: bool,
1177        extra: Option<&ExtraInfo<'_>>,
1178    ) -> Self {
1179        let allow_error_code_check = allow_error_code_check.as_bool();
1180        let mut seen_rust_tags = false;
1181        let mut seen_other_tags = false;
1182        let mut seen_custom_tag = false;
1183        let mut data = LangString::default();
1184        let mut ignores = vec![];
1185
1186        data.original = string.to_owned();
1187
1188        let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
1189            for token in tokens {
1190                match token {
1191                    LangStringToken::LangToken("should_panic") => {
1192                        data.should_panic = true;
1193                        seen_rust_tags = !seen_other_tags;
1194                    }
1195                    LangStringToken::LangToken("no_run") => {
1196                        data.no_run = true;
1197                        seen_rust_tags = !seen_other_tags;
1198                    }
1199                    LangStringToken::LangToken("ignore") => {
1200                        data.ignore = Ignore::All;
1201                        seen_rust_tags = !seen_other_tags;
1202                    }
1203                    LangStringToken::LangToken(x) if x.starts_with("ignore-") => {
1204                        if enable_per_target_ignores {
1205                            ignores.push(x.trim_start_matches("ignore-").to_owned());
1206                            seen_rust_tags = !seen_other_tags;
1207                        }
1208                    }
1209                    LangStringToken::LangToken("rust") => {
1210                        data.rust = true;
1211                        seen_rust_tags = true;
1212                    }
1213                    LangStringToken::LangToken("custom") => {
1214                        seen_custom_tag = true;
1215                    }
1216                    LangStringToken::LangToken("test_harness") => {
1217                        data.test_harness = true;
1218                        seen_rust_tags = !seen_other_tags || seen_rust_tags;
1219                    }
1220                    LangStringToken::LangToken("compile_fail") => {
1221                        data.compile_fail = true;
1222                        seen_rust_tags = !seen_other_tags || seen_rust_tags;
1223                        data.no_run = true;
1224                    }
1225                    LangStringToken::LangToken("standalone_crate") => {
1226                        data.standalone_crate = true;
1227                        seen_rust_tags = !seen_other_tags || seen_rust_tags;
1228                    }
1229                    LangStringToken::LangToken(x) if x.starts_with("edition") => {
1230                        data.edition = x[7..].parse::<Edition>().ok();
1231                    }
1232                    LangStringToken::LangToken(x)
1233                        if x.starts_with("rust") && x[4..].parse::<Edition>().is_ok() =>
1234                    {
1235                        if let Some(extra) = extra {
1236                            extra.error_invalid_codeblock_attr_with_help(
1237                                format!("unknown attribute `{x}`"),
1238                                |lint| {
1239                                    lint.help(format!(
1240                                        "there is an attribute with a similar name: `edition{}`",
1241                                        &x[4..],
1242                                    ));
1243                                },
1244                            );
1245                        }
1246                    }
1247                    LangStringToken::LangToken(x)
1248                        if allow_error_code_check && x.starts_with('E') && x.len() == 5 =>
1249                    {
1250                        if x[1..].parse::<u32>().is_ok() {
1251                            data.error_codes.push(x.to_owned());
1252                            seen_rust_tags = !seen_other_tags || seen_rust_tags;
1253                        } else {
1254                            seen_other_tags = true;
1255                        }
1256                    }
1257                    LangStringToken::LangToken(x) if extra.is_some() => {
1258                        let s = x.to_lowercase();
1259                        if let Some(help) = match s.as_str() {
1260                            "compile-fail" | "compile_fail" | "compilefail" => Some(
1261                                "use `compile_fail` to invert the results of this test, so that it \
1262                                passes if it cannot be compiled and fails if it can",
1263                            ),
1264                            "should-panic" | "should_panic" | "shouldpanic" => Some(
1265                                "use `should_panic` to invert the results of this test, so that if \
1266                                passes if it panics and fails if it does not",
1267                            ),
1268                            "no-run" | "no_run" | "norun" => Some(
1269                                "use `no_run` to compile, but not run, the code sample during \
1270                                testing",
1271                            ),
1272                            "test-harness" | "test_harness" | "testharness" => Some(
1273                                "use `test_harness` to run functions marked `#[test]` instead of a \
1274                                potentially-implicit `main` function",
1275                            ),
1276                            "standalone" | "standalone_crate" | "standalone-crate" => {
1277                                if let Some(extra) = extra
1278                                    && extra.sp.at_least_rust_2024()
1279                                {
1280                                    Some(
1281                                        "use `standalone_crate` to compile this code block \
1282                                        separately",
1283                                    )
1284                                } else {
1285                                    None
1286                                }
1287                            }
1288                            _ => None,
1289                        } {
1290                            if let Some(extra) = extra {
1291                                extra.error_invalid_codeblock_attr_with_help(
1292                                    format!("unknown attribute `{x}`"),
1293                                    |lint| {
1294                                        lint.help(help).help(
1295                                            "this code block may be skipped during testing, \
1296                                            because unknown attributes are treated as markers for \
1297                                            code samples written in other programming languages, \
1298                                            unless it is also explicitly marked as `rust`",
1299                                        );
1300                                    },
1301                                );
1302                            }
1303                        }
1304                        seen_other_tags = true;
1305                        data.unknown.push(x.to_owned());
1306                    }
1307                    LangStringToken::LangToken(x) => {
1308                        seen_other_tags = true;
1309                        data.unknown.push(x.to_owned());
1310                    }
1311                    LangStringToken::KeyValueAttribute(key, value) => {
1312                        if key == "class" {
1313                            data.added_classes.push(value.to_owned());
1314                        } else if let Some(extra) = extra {
1315                            extra.error_invalid_codeblock_attr(format!(
1316                                "unsupported attribute `{key}`"
1317                            ));
1318                        }
1319                    }
1320                    LangStringToken::ClassAttribute(class) => {
1321                        data.added_classes.push(class.to_owned());
1322                    }
1323                }
1324            }
1325        };
1326
1327        let mut tag_iter = TagIterator::new(string, extra);
1328        call(&mut tag_iter);
1329
1330        // ignore-foo overrides ignore
1331        if !ignores.is_empty() {
1332            data.ignore = Ignore::Some(ignores);
1333        }
1334
1335        data.rust &= !seen_custom_tag && (!seen_other_tags || seen_rust_tags) && !tag_iter.is_error;
1336
1337        data
1338    }
1339}
1340
1341impl<'a> Markdown<'a> {
1342    pub fn into_string(self) -> String {
1343        // This is actually common enough to special-case
1344        if self.content.is_empty() {
1345            return String::new();
1346        }
1347
1348        let mut s = String::with_capacity(self.content.len() * 3 / 2);
1349        html::push_html(&mut s, self.into_iter());
1350
1351        s
1352    }
1353
1354    fn into_iter(self) -> CodeBlocks<'a, 'a, impl Iterator<Item = Event<'a>>> {
1355        let Markdown {
1356            content: md,
1357            links,
1358            ids,
1359            error_codes: codes,
1360            edition,
1361            playground,
1362            heading_offset,
1363        } = self;
1364
1365        let replacer = move |broken_link: BrokenLink<'_>| {
1366            links
1367                .iter()
1368                .find(|link| *link.original_text == *broken_link.reference)
1369                .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1370        };
1371
1372        let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1373        let p = p.into_offset_iter();
1374
1375        ids.handle_footnotes(|ids, existing_footnotes| {
1376            let p = HeadingLinks::new(p, None, ids, heading_offset);
1377            let p = SpannedLinkReplacer::new(p, links);
1378            let p = footnotes::Footnotes::new(p, existing_footnotes);
1379            let p = TableWrapper::new(p.map(|(ev, _)| ev));
1380            CodeBlocks::new(p, codes, edition, playground)
1381        })
1382    }
1383
1384    /// Convert markdown to (summary, remaining) HTML.
1385    ///
1386    /// - The summary is the first top-level Markdown element (usually a paragraph, but potentially
1387    ///   any block).
1388    /// - The remaining docs contain everything after the summary.
1389    pub(crate) fn split_summary_and_content(self) -> (Option<String>, Option<String>) {
1390        if self.content.is_empty() {
1391            return (None, None);
1392        }
1393        let mut p = self.into_iter();
1394
1395        let mut event_level = 0;
1396        let mut summary_events = Vec::new();
1397        let mut get_next_tag = false;
1398
1399        let mut end_of_summary = false;
1400        while let Some(event) = p.next() {
1401            match event {
1402                Event::Start(_) => event_level += 1,
1403                Event::End(kind) => {
1404                    event_level -= 1;
1405                    if event_level == 0 {
1406                        // We're back at the "top" so it means we're done with the summary.
1407                        end_of_summary = true;
1408                        // We surround tables with `<div>` HTML tags so this is a special case.
1409                        get_next_tag = kind == TagEnd::Table;
1410                    }
1411                }
1412                _ => {}
1413            }
1414            summary_events.push(event);
1415            if end_of_summary {
1416                if get_next_tag && let Some(event) = p.next() {
1417                    summary_events.push(event);
1418                }
1419                break;
1420            }
1421        }
1422        let mut summary = String::new();
1423        html::push_html(&mut summary, summary_events.into_iter());
1424        if summary.is_empty() {
1425            return (None, None);
1426        }
1427        let mut content = String::new();
1428        html::push_html(&mut content, p);
1429
1430        if content.is_empty() { (Some(summary), None) } else { (Some(summary), Some(content)) }
1431    }
1432}
1433
1434impl MarkdownWithToc<'_> {
1435    pub(crate) fn into_parts(self) -> (Toc, String) {
1436        let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } =
1437            self;
1438
1439        // This is actually common enough to special-case
1440        if md.is_empty() {
1441            return (Toc { entries: Vec::new() }, String::new());
1442        }
1443        let mut replacer = |broken_link: BrokenLink<'_>| {
1444            links
1445                .iter()
1446                .find(|link| *link.original_text == *broken_link.reference)
1447                .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1448        };
1449
1450        let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut replacer));
1451        let p = p.into_offset_iter();
1452
1453        let mut s = String::with_capacity(md.len() * 3 / 2);
1454
1455        let mut toc = TocBuilder::new();
1456
1457        ids.handle_footnotes(|ids, existing_footnotes| {
1458            let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1);
1459            let p = footnotes::Footnotes::new(p, existing_footnotes);
1460            let p = TableWrapper::new(p.map(|(ev, _)| ev));
1461            let p = CodeBlocks::new(p, codes, edition, playground);
1462            html::push_html(&mut s, p);
1463        });
1464
1465        (toc.into_toc(), s)
1466    }
1467    pub(crate) fn into_string(self) -> String {
1468        let (toc, s) = self.into_parts();
1469        format!("<nav id=\"rustdoc\">{toc}</nav>{s}", toc = toc.print())
1470    }
1471}
1472
1473impl MarkdownItemInfo<'_> {
1474    pub(crate) fn into_string(self) -> String {
1475        let MarkdownItemInfo(md, ids) = self;
1476
1477        // This is actually common enough to special-case
1478        if md.is_empty() {
1479            return String::new();
1480        }
1481        let p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1482
1483        // Treat inline HTML as plain text.
1484        let p = p.map(|event| match event.0 {
1485            Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
1486            _ => event,
1487        });
1488
1489        let mut s = String::with_capacity(md.len() * 3 / 2);
1490
1491        ids.handle_footnotes(|ids, existing_footnotes| {
1492            let p = HeadingLinks::new(p, None, ids, HeadingOffset::H1);
1493            let p = footnotes::Footnotes::new(p, existing_footnotes);
1494            let p = TableWrapper::new(p.map(|(ev, _)| ev));
1495            let p = p.filter(|event| {
1496                !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1497            });
1498            html::push_html(&mut s, p);
1499        });
1500
1501        s
1502    }
1503}
1504
1505impl MarkdownSummaryLine<'_> {
1506    pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
1507        let MarkdownSummaryLine(md, links) = self;
1508        // This is actually common enough to special-case
1509        if md.is_empty() {
1510            return (String::new(), false);
1511        }
1512
1513        let mut replacer = |broken_link: BrokenLink<'_>| {
1514            links
1515                .iter()
1516                .find(|link| *link.original_text == *broken_link.reference)
1517                .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1518        };
1519
1520        let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer))
1521            .peekable();
1522        let mut summary = SummaryLine::new(p);
1523
1524        let mut s = String::new();
1525
1526        let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
1527            !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1528        });
1529
1530        html::push_html(&mut s, without_paragraphs);
1531
1532        let has_more_content =
1533            matches!(summary.inner.peek(), Some(Event::Start(_))) || summary.skipped_tags > 0;
1534
1535        (s, has_more_content)
1536    }
1537
1538    pub(crate) fn into_string(self) -> String {
1539        self.into_string_with_has_more_content().0
1540    }
1541}
1542
1543/// Renders a subset of Markdown in the first paragraph of the provided Markdown.
1544///
1545/// - *Italics*, **bold**, and `inline code` styles **are** rendered.
1546/// - Headings and links are stripped (though the text *is* rendered).
1547/// - HTML, code blocks, and everything else are ignored.
1548///
1549/// Returns a tuple of the rendered HTML string and whether the output was shortened
1550/// due to the provided `length_limit`.
1551fn markdown_summary_with_limit(
1552    md: &str,
1553    link_names: &[RenderedLink],
1554    length_limit: usize,
1555) -> (String, bool) {
1556    if md.is_empty() {
1557        return (String::new(), false);
1558    }
1559
1560    let mut replacer = |broken_link: BrokenLink<'_>| {
1561        link_names
1562            .iter()
1563            .find(|link| *link.original_text == *broken_link.reference)
1564            .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1565    };
1566
1567    let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1568    let mut p = LinkReplacer::new(p, link_names);
1569
1570    let mut buf = HtmlWithLimit::new(length_limit);
1571    let mut stopped_early = false;
1572    p.try_for_each(|event| {
1573        match &event {
1574            Event::Text(text) => {
1575                let r =
1576                    text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word));
1577                if r.is_break() {
1578                    stopped_early = true;
1579                }
1580                return r;
1581            }
1582            Event::Code(code) => {
1583                buf.open_tag("code");
1584                let r = buf.push(code);
1585                if r.is_break() {
1586                    stopped_early = true;
1587                } else {
1588                    buf.close_tag();
1589                }
1590                return r;
1591            }
1592            Event::Start(tag) => match tag {
1593                Tag::Emphasis => buf.open_tag("em"),
1594                Tag::Strong => buf.open_tag("strong"),
1595                Tag::CodeBlock(..) => return ControlFlow::Break(()),
1596                _ => {}
1597            },
1598            Event::End(tag) => match tag {
1599                TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
1600                TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
1601                _ => {}
1602            },
1603            Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
1604            _ => {}
1605        };
1606        ControlFlow::Continue(())
1607    });
1608
1609    (buf.finish(), stopped_early)
1610}
1611
1612/// Renders a shortened first paragraph of the given Markdown as a subset of Markdown,
1613/// making it suitable for contexts like the search index.
1614///
1615/// Will shorten to 59 or 60 characters, including an ellipsis (…) if it was shortened.
1616///
1617/// See [`markdown_summary_with_limit`] for details about what is rendered and what is not.
1618pub(crate) fn short_markdown_summary(markdown: &str, link_names: &[RenderedLink]) -> String {
1619    let (mut s, was_shortened) = markdown_summary_with_limit(markdown, link_names, 59);
1620
1621    if was_shortened {
1622        s.push('…');
1623    }
1624
1625    s
1626}
1627
1628/// Renders the first paragraph of the provided markdown as plain text.
1629/// Useful for alt-text.
1630///
1631/// - Headings, links, and formatting are stripped.
1632/// - Inline code is rendered as-is, surrounded by backticks.
1633/// - HTML and code blocks are ignored.
1634pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> String {
1635    if md.is_empty() {
1636        return String::new();
1637    }
1638
1639    let mut s = String::with_capacity(md.len() * 3 / 2);
1640
1641    let mut replacer = |broken_link: BrokenLink<'_>| {
1642        link_names
1643            .iter()
1644            .find(|link| *link.original_text == *broken_link.reference)
1645            .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1646    };
1647
1648    let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1649
1650    plain_text_from_events(p, &mut s);
1651
1652    s
1653}
1654
1655pub(crate) fn plain_text_from_events<'a>(
1656    events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1657    s: &mut String,
1658) {
1659    for event in events {
1660        match &event {
1661            Event::Text(text) => s.push_str(text),
1662            Event::Code(code) => {
1663                s.push('`');
1664                s.push_str(code);
1665                s.push('`');
1666            }
1667            Event::HardBreak | Event::SoftBreak => s.push(' '),
1668            Event::Start(Tag::CodeBlock(..)) => break,
1669            Event::End(TagEnd::Paragraph) => break,
1670            Event::End(TagEnd::Heading(..)) => break,
1671            _ => (),
1672        }
1673    }
1674}
1675
1676pub(crate) fn html_text_from_events<'a>(
1677    events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1678    s: &mut String,
1679) {
1680    for event in events {
1681        match &event {
1682            Event::Text(text) => {
1683                write!(s, "{}", EscapeBodyText(text)).expect("string alloc infallible")
1684            }
1685            Event::Code(code) => {
1686                s.push_str("<code>");
1687                write!(s, "{}", EscapeBodyText(code)).expect("string alloc infallible");
1688                s.push_str("</code>");
1689            }
1690            Event::HardBreak | Event::SoftBreak => s.push(' '),
1691            Event::Start(Tag::CodeBlock(..)) => break,
1692            Event::End(TagEnd::Paragraph) => break,
1693            Event::End(TagEnd::Heading(..)) => break,
1694            _ => (),
1695        }
1696    }
1697}
1698
1699#[derive(Debug)]
1700pub(crate) struct MarkdownLink {
1701    pub kind: LinkType,
1702    pub link: String,
1703    pub range: MarkdownLinkRange,
1704}
1705
1706#[derive(Clone, Debug)]
1707pub(crate) enum MarkdownLinkRange {
1708    /// Normally, markdown link warnings point only at the destination.
1709    Destination(Range<usize>),
1710    /// In some cases, it's not possible to point at the destination.
1711    /// Usually, this happens because backslashes `\\` are used.
1712    /// When that happens, point at the whole link, and don't provide structured suggestions.
1713    WholeLink(Range<usize>),
1714}
1715
1716impl MarkdownLinkRange {
1717    /// Extracts the inner range.
1718    pub fn inner_range(&self) -> &Range<usize> {
1719        match self {
1720            MarkdownLinkRange::Destination(range) => range,
1721            MarkdownLinkRange::WholeLink(range) => range,
1722        }
1723    }
1724}
1725
1726pub(crate) fn markdown_links<'md, R>(
1727    md: &'md str,
1728    preprocess_link: impl Fn(MarkdownLink) -> Option<R>,
1729) -> Vec<R> {
1730    if md.is_empty() {
1731        return vec![];
1732    }
1733
1734    // FIXME: remove this function once pulldown_cmark can provide spans for link definitions.
1735    let locate = |s: &str, fallback: Range<usize>| unsafe {
1736        let s_start = s.as_ptr();
1737        let s_end = s_start.add(s.len());
1738        let md_start = md.as_ptr();
1739        let md_end = md_start.add(md.len());
1740        if md_start <= s_start && s_end <= md_end {
1741            let start = s_start.offset_from(md_start) as usize;
1742            let end = s_end.offset_from(md_start) as usize;
1743            MarkdownLinkRange::Destination(start..end)
1744        } else {
1745            MarkdownLinkRange::WholeLink(fallback)
1746        }
1747    };
1748
1749    let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1750        // For diagnostics, we want to underline the link's definition but `span` will point at
1751        // where the link is used. This is a problem for reference-style links, where the definition
1752        // is separate from the usage.
1753
1754        match link {
1755            // `Borrowed` variant means the string (the link's destination) may come directly from
1756            // the markdown text and we can locate the original link destination.
1757            // NOTE: LinkReplacer also provides `Borrowed` but possibly from other sources,
1758            // so `locate()` can fall back to use `span`.
1759            CowStr::Borrowed(s) => locate(s, span),
1760
1761            // For anything else, we can only use the provided range.
1762            CowStr::Boxed(_) | CowStr::Inlined(_) => MarkdownLinkRange::WholeLink(span),
1763        }
1764    };
1765
1766    let span_for_offset_backward = |span: Range<usize>, open: u8, close: u8| {
1767        let mut open_brace = !0;
1768        let mut close_brace = !0;
1769        for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate().rev() {
1770            let i = i + span.start;
1771            if b == close {
1772                close_brace = i;
1773                break;
1774            }
1775        }
1776        if close_brace < span.start || close_brace >= span.end {
1777            return MarkdownLinkRange::WholeLink(span);
1778        }
1779        let mut nesting = 1;
1780        for (i, b) in md.as_bytes()[span.start..close_brace].iter().copied().enumerate().rev() {
1781            let i = i + span.start;
1782            if b == close {
1783                nesting += 1;
1784            }
1785            if b == open {
1786                nesting -= 1;
1787            }
1788            if nesting == 0 {
1789                open_brace = i;
1790                break;
1791            }
1792        }
1793        assert!(open_brace != close_brace);
1794        if open_brace < span.start || open_brace >= span.end {
1795            return MarkdownLinkRange::WholeLink(span);
1796        }
1797        // do not actually include braces in the span
1798        let range = (open_brace + 1)..close_brace;
1799        MarkdownLinkRange::Destination(range)
1800    };
1801
1802    let span_for_offset_forward = |span: Range<usize>, open: u8, close: u8| {
1803        let mut open_brace = !0;
1804        let mut close_brace = !0;
1805        for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate() {
1806            let i = i + span.start;
1807            if b == open {
1808                open_brace = i;
1809                break;
1810            }
1811        }
1812        if open_brace < span.start || open_brace >= span.end {
1813            return MarkdownLinkRange::WholeLink(span);
1814        }
1815        let mut nesting = 0;
1816        for (i, b) in md.as_bytes()[open_brace..span.end].iter().copied().enumerate() {
1817            let i = i + open_brace;
1818            if b == close {
1819                nesting -= 1;
1820            }
1821            if b == open {
1822                nesting += 1;
1823            }
1824            if nesting == 0 {
1825                close_brace = i;
1826                break;
1827            }
1828        }
1829        assert!(open_brace != close_brace);
1830        if open_brace < span.start || open_brace >= span.end {
1831            return MarkdownLinkRange::WholeLink(span);
1832        }
1833        // do not actually include braces in the span
1834        let range = (open_brace + 1)..close_brace;
1835        MarkdownLinkRange::Destination(range)
1836    };
1837
1838    let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
1839    let event_iter = Parser::new_with_broken_link_callback(
1840        md,
1841        main_body_opts(),
1842        Some(&mut broken_link_callback),
1843    )
1844    .into_offset_iter();
1845    let mut links = Vec::new();
1846
1847    for (event, span) in event_iter {
1848        match event {
1849            Event::Start(Tag::Link { link_type, dest_url, .. }) if may_be_doc_link(link_type) => {
1850                let range = match link_type {
1851                    // Link is pulled from the link itself.
1852                    LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
1853                        span_for_offset_backward(span, b'[', b']')
1854                    }
1855                    LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'),
1856                    LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
1857                    // Link is pulled from elsewhere in the document.
1858                    LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
1859                        span_for_link(&dest_url, span)
1860                    }
1861                    LinkType::Autolink | LinkType::Email => unreachable!(),
1862                };
1863
1864                if let Some(link) = preprocess_link(MarkdownLink {
1865                    kind: link_type,
1866                    link: dest_url.into_string(),
1867                    range,
1868                }) {
1869                    links.push(link);
1870                }
1871            }
1872            _ => {}
1873        }
1874    }
1875
1876    links
1877}
1878
1879#[derive(Debug)]
1880pub(crate) struct RustCodeBlock {
1881    /// The range in the markdown that the code block occupies. Note that this includes the fences
1882    /// for fenced code blocks.
1883    pub(crate) range: Range<usize>,
1884    /// The range in the markdown that the code within the code block occupies.
1885    pub(crate) code: Range<usize>,
1886    pub(crate) is_fenced: bool,
1887    pub(crate) lang_string: LangString,
1888}
1889
1890/// Returns a range of bytes for each code block in the markdown that is tagged as `rust` or
1891/// untagged (and assumed to be rust).
1892pub(crate) fn rust_code_blocks(md: &str, extra_info: &ExtraInfo<'_>) -> Vec<RustCodeBlock> {
1893    let mut code_blocks = vec![];
1894
1895    if md.is_empty() {
1896        return code_blocks;
1897    }
1898
1899    let mut p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1900
1901    while let Some((event, offset)) = p.next() {
1902        if let Event::Start(Tag::CodeBlock(syntax)) = event {
1903            let (lang_string, code_start, code_end, range, is_fenced) = match syntax {
1904                CodeBlockKind::Fenced(syntax) => {
1905                    let syntax = syntax.as_ref();
1906                    let lang_string = if syntax.is_empty() {
1907                        Default::default()
1908                    } else {
1909                        LangString::parse(syntax, ErrorCodes::Yes, false, Some(extra_info))
1910                    };
1911                    if !lang_string.rust {
1912                        continue;
1913                    }
1914                    let (code_start, mut code_end) = match p.next() {
1915                        Some((Event::Text(_), offset)) => (offset.start, offset.end),
1916                        Some((_, sub_offset)) => {
1917                            let code = Range { start: sub_offset.start, end: sub_offset.start };
1918                            code_blocks.push(RustCodeBlock {
1919                                is_fenced: true,
1920                                range: offset,
1921                                code,
1922                                lang_string,
1923                            });
1924                            continue;
1925                        }
1926                        None => {
1927                            let code = Range { start: offset.end, end: offset.end };
1928                            code_blocks.push(RustCodeBlock {
1929                                is_fenced: true,
1930                                range: offset,
1931                                code,
1932                                lang_string,
1933                            });
1934                            continue;
1935                        }
1936                    };
1937                    while let Some((Event::Text(_), offset)) = p.next() {
1938                        code_end = offset.end;
1939                    }
1940                    (lang_string, code_start, code_end, offset, true)
1941                }
1942                CodeBlockKind::Indented => {
1943                    // The ending of the offset goes too far sometime so we reduce it by one in
1944                    // these cases.
1945                    if offset.end > offset.start && md.get(offset.end..=offset.end) == Some("\n") {
1946                        (
1947                            LangString::default(),
1948                            offset.start,
1949                            offset.end,
1950                            Range { start: offset.start, end: offset.end - 1 },
1951                            false,
1952                        )
1953                    } else {
1954                        (LangString::default(), offset.start, offset.end, offset, false)
1955                    }
1956                }
1957            };
1958
1959            code_blocks.push(RustCodeBlock {
1960                is_fenced,
1961                range,
1962                code: Range { start: code_start, end: code_end },
1963                lang_string,
1964            });
1965        }
1966    }
1967
1968    code_blocks
1969}
1970
1971#[derive(Clone, Default, Debug)]
1972pub struct IdMap {
1973    map: FxHashMap<String, usize>,
1974    existing_footnotes: Arc<AtomicUsize>,
1975}
1976
1977fn is_default_id(id: &str) -> bool {
1978    matches!(
1979        id,
1980        // This is the list of IDs used in JavaScript.
1981        "help"
1982        | "settings"
1983        | "not-displayed"
1984        | "alternative-display"
1985        | "search"
1986        | "crate-search"
1987        | "crate-search-div"
1988        // This is the list of IDs used in HTML generated in Rust (including the ones
1989        // used in tera template files).
1990        | "themeStyle"
1991        | "settings-menu"
1992        | "help-button"
1993        | "sidebar-button"
1994        | "main-content"
1995        | "toggle-all-docs"
1996        | "all-types"
1997        | "default-settings"
1998        | "sidebar-vars"
1999        | "copy-path"
2000        | "rustdoc-toc"
2001        | "rustdoc-modnav"
2002        // This is the list of IDs used by rustdoc sections (but still generated by
2003        // rustdoc).
2004        | "fields"
2005        | "variants"
2006        | "implementors-list"
2007        | "synthetic-implementors-list"
2008        | "foreign-impls"
2009        | "implementations"
2010        | "trait-implementations"
2011        | "synthetic-implementations"
2012        | "blanket-implementations"
2013        | "required-associated-types"
2014        | "provided-associated-types"
2015        | "provided-associated-consts"
2016        | "required-associated-consts"
2017        | "required-methods"
2018        | "provided-methods"
2019        | "dyn-compatibility"
2020        | "implementors"
2021        | "synthetic-implementors"
2022        | "implementations-list"
2023        | "trait-implementations-list"
2024        | "synthetic-implementations-list"
2025        | "blanket-implementations-list"
2026        | "deref-methods"
2027        | "layout"
2028        | "aliased-type"
2029    )
2030}
2031
2032impl IdMap {
2033    pub fn new() -> Self {
2034        IdMap { map: FxHashMap::default(), existing_footnotes: Arc::new(AtomicUsize::new(0)) }
2035    }
2036
2037    pub(crate) fn derive<S: AsRef<str> + ToString>(&mut self, candidate: S) -> String {
2038        let id = match self.map.get_mut(candidate.as_ref()) {
2039            None => {
2040                let candidate = candidate.to_string();
2041                if is_default_id(&candidate) {
2042                    let id = format!("{}-{}", candidate, 1);
2043                    self.map.insert(candidate, 2);
2044                    id
2045                } else {
2046                    candidate
2047                }
2048            }
2049            Some(a) => {
2050                let id = format!("{}-{}", candidate.as_ref(), *a);
2051                *a += 1;
2052                id
2053            }
2054        };
2055
2056        self.map.insert(id.clone(), 1);
2057        id
2058    }
2059
2060    /// Method to handle `existing_footnotes` increment automatically (to prevent forgetting
2061    /// about it).
2062    pub(crate) fn handle_footnotes<'a, T, F: FnOnce(&'a mut Self, Weak<AtomicUsize>) -> T>(
2063        &'a mut self,
2064        closure: F,
2065    ) -> T {
2066        let existing_footnotes = Arc::downgrade(&self.existing_footnotes);
2067
2068        closure(self, existing_footnotes)
2069    }
2070
2071    pub(crate) fn clear(&mut self) {
2072        self.map.clear();
2073        self.existing_footnotes = Arc::new(AtomicUsize::new(0));
2074    }
2075}