1use std::borrow::Cow;
30use std::collections::VecDeque;
31use std::fmt::{self, Write};
32use std::iter::Peekable;
33use std::ops::{ControlFlow, Range};
34use std::path::PathBuf;
35use std::str::{self, CharIndices};
36use std::sync::atomic::AtomicUsize;
37use std::sync::{Arc, Weak};
38
39use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
40use rustc_errors::{Diag, DiagMessage};
41use rustc_hir::def_id::LocalDefId;
42use rustc_middle::ty::TyCtxt;
43pub(crate) use rustc_resolve::rustdoc::main_body_opts;
44use rustc_resolve::rustdoc::may_be_doc_link;
45use rustc_resolve::rustdoc::pulldown_cmark::{
46 self, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, html,
47};
48use rustc_span::edition::Edition;
49use rustc_span::{Span, Symbol};
50use tracing::{debug, trace};
51
52use crate::clean::RenderedLink;
53use crate::doctest;
54use crate::doctest::GlobalTestOptions;
55use crate::html::escape::{Escape, EscapeBodyText};
56use crate::html::highlight;
57use crate::html::length_limit::HtmlWithLimit;
58use crate::html::render::small_url_encode;
59use crate::html::toc::{Toc, TocBuilder};
60
61mod footnotes;
62#[cfg(test)]
63mod tests;
64
65const MAX_HEADER_LEVEL: u32 = 6;
66
67pub(crate) fn summary_opts() -> Options {
69 Options::ENABLE_TABLES
70 | Options::ENABLE_FOOTNOTES
71 | Options::ENABLE_STRIKETHROUGH
72 | Options::ENABLE_TASKLISTS
73 | Options::ENABLE_SMART_PUNCTUATION
74}
75
76#[derive(Debug, Clone, Copy)]
77pub enum HeadingOffset {
78 H1 = 0,
79 H2,
80 H3,
81 H4,
82 H5,
83 H6,
84}
85
86pub struct Markdown<'a> {
89 pub content: &'a str,
90 pub links: &'a [RenderedLink],
92 pub ids: &'a mut IdMap,
94 pub error_codes: ErrorCodes,
96 pub edition: Edition,
98 pub playground: &'a Option<Playground>,
99 pub heading_offset: HeadingOffset,
102}
103pub(crate) struct MarkdownWithToc<'a> {
105 pub(crate) content: &'a str,
106 pub(crate) links: &'a [RenderedLink],
107 pub(crate) ids: &'a mut IdMap,
108 pub(crate) error_codes: ErrorCodes,
109 pub(crate) edition: Edition,
110 pub(crate) playground: &'a Option<Playground>,
111}
112pub(crate) struct MarkdownItemInfo<'a> {
115 pub(crate) content: &'a str,
116 pub(crate) links: &'a [RenderedLink],
117 pub(crate) ids: &'a mut IdMap,
118}
119pub(crate) struct MarkdownSummaryLine<'a>(pub &'a str, pub &'a [RenderedLink]);
121
122#[derive(Copy, Clone, PartialEq, Debug)]
123pub enum ErrorCodes {
124 Yes,
125 No,
126}
127
128impl ErrorCodes {
129 pub(crate) fn from(b: bool) -> Self {
130 match b {
131 true => ErrorCodes::Yes,
132 false => ErrorCodes::No,
133 }
134 }
135
136 pub(crate) fn as_bool(self) -> bool {
137 match self {
138 ErrorCodes::Yes => true,
139 ErrorCodes::No => false,
140 }
141 }
142}
143
144pub(crate) enum Line<'a> {
148 Hidden(&'a str),
149 Shown(Cow<'a, str>),
150}
151
152impl<'a> Line<'a> {
153 fn for_html(self) -> Option<Cow<'a, str>> {
154 match self {
155 Line::Shown(l) => Some(l),
156 Line::Hidden(_) => None,
157 }
158 }
159
160 pub(crate) fn for_code(self) -> Cow<'a, str> {
161 match self {
162 Line::Shown(l) => l,
163 Line::Hidden(l) => Cow::Borrowed(l),
164 }
165 }
166}
167
168pub(crate) fn map_line(s: &str) -> Line<'_> {
176 let trimmed = s.trim();
177 if trimmed.starts_with("##") {
178 Line::Shown(Cow::Owned(s.replacen("##", "#", 1)))
179 } else if let Some(stripped) = trimmed.strip_prefix("# ") {
180 Line::Hidden(stripped)
182 } else if trimmed == "#" {
183 Line::Hidden("")
185 } else {
186 Line::Shown(Cow::Borrowed(s))
187 }
188}
189
190fn slugify(c: char) -> Option<char> {
194 if c.is_alphanumeric() || c == '-' || c == '_' {
195 if c.is_ascii() { Some(c.to_ascii_lowercase()) } else { Some(c) }
196 } else if c.is_whitespace() && c.is_ascii() {
197 Some('-')
198 } else {
199 None
200 }
201}
202
203#[derive(Debug)]
204pub struct Playground {
205 pub crate_name: Option<Symbol>,
206 pub url: String,
207}
208
209struct CodeBlocks<'p, 'a, I: Iterator<Item = Event<'a>>> {
211 inner: I,
212 check_error_codes: ErrorCodes,
213 edition: Edition,
214 playground: &'p Option<Playground>,
217}
218
219impl<'p, 'a, I: Iterator<Item = Event<'a>>> CodeBlocks<'p, 'a, I> {
220 fn new(
221 iter: I,
222 error_codes: ErrorCodes,
223 edition: Edition,
224 playground: &'p Option<Playground>,
225 ) -> Self {
226 CodeBlocks { inner: iter, check_error_codes: error_codes, edition, playground }
227 }
228}
229
230impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
231 type Item = Event<'a>;
232
233 fn next(&mut self) -> Option<Self::Item> {
234 let event = self.inner.next();
235 let Some(Event::Start(Tag::CodeBlock(kind))) = event else {
236 return event;
237 };
238
239 let mut original_text = String::new();
240 for event in &mut self.inner {
241 match event {
242 Event::End(TagEnd::CodeBlock) => break,
243 Event::Text(ref s) => {
244 original_text.push_str(s);
245 }
246 _ => {}
247 }
248 }
249
250 let LangString { added_classes, compile_fail, should_panic, ignore, edition, .. } =
251 match kind {
252 CodeBlockKind::Fenced(ref lang) => {
253 let parse_result =
254 LangString::parse_without_check(lang, self.check_error_codes);
255 if !parse_result.rust {
256 let added_classes = parse_result.added_classes;
257 let lang_string = if let Some(lang) = parse_result.unknown.first() {
258 format!("language-{lang}")
259 } else {
260 String::new()
261 };
262 let whitespace = if added_classes.is_empty() { "" } else { " " };
263 return Some(Event::Html(
264 format!(
265 "<div class=\"example-wrap\">\
266 <pre class=\"{lang_string}{whitespace}{added_classes}\">\
267 <code>{text}</code>\
268 </pre>\
269 </div>",
270 added_classes = added_classes.join(" "),
271 text = Escape(original_text.trim_suffix('\n')),
272 )
273 .into(),
274 ));
275 }
276 parse_result
277 }
278 CodeBlockKind::Indented => Default::default(),
279 };
280
281 let lines = original_text.lines().filter_map(|l| map_line(l).for_html());
282 let text = lines.intersperse("\n".into()).collect::<String>();
283
284 let explicit_edition = edition.is_some();
285 let edition = edition.unwrap_or(self.edition);
286
287 let playground_button = self.playground.as_ref().and_then(|playground| {
288 let krate = &playground.crate_name;
289 let url = &playground.url;
290 if url.is_empty() {
291 return None;
292 }
293 let test = original_text
294 .lines()
295 .map(|l| map_line(l).for_code())
296 .intersperse("\n".into())
297 .collect::<String>();
298 let krate = krate.as_ref().map(|s| s.as_str());
299
300 let opts = GlobalTestOptions {
303 crate_name: krate.map(String::from).unwrap_or_default(),
304 no_crate_inject: false,
305 insert_indent_space: true,
306 args_file: PathBuf::new(),
307 };
308 let mut builder = doctest::BuildDocTestBuilder::new(&test).edition(edition);
309 if let Some(krate) = krate {
310 builder = builder.crate_name(krate);
311 }
312 let doctest = builder.build(None);
313 let (wrapped, _) = doctest.generate_unique_doctest(&test, false, &opts, krate);
314 let test = wrapped.to_string();
315 let channel = if test.contains("#![feature(") { "&version=nightly" } else { "" };
316
317 let test_escaped = small_url_encode(test);
318 Some(format!(
319 "<a class=\"test-arrow\" \
320 target=\"_blank\" \
321 title=\"Run code\" \
322 href=\"{url}?code={test_escaped}{channel}&edition={edition}\"></a>",
323 ))
324 });
325
326 let tooltip = {
327 use highlight::Tooltip::*;
328
329 if ignore == Ignore::All {
330 Some(IgnoreAll)
331 } else if let Ignore::Some(platforms) = ignore {
332 Some(IgnoreSome(platforms))
333 } else if compile_fail {
334 Some(CompileFail)
335 } else if should_panic {
336 Some(ShouldPanic)
337 } else if explicit_edition {
338 Some(Edition(edition))
339 } else {
340 None
341 }
342 };
343
344 let s = format!(
347 "\n{}",
348 highlight::render_example_with_highlighting(
349 &text,
350 tooltip.as_ref(),
351 playground_button.as_deref(),
352 &added_classes,
353 )
354 );
355 Some(Event::Html(s.into()))
356 }
357}
358
359struct LinkReplacerInner<'a> {
361 links: &'a [RenderedLink],
362 shortcut_link: Option<&'a RenderedLink>,
363}
364
365struct LinkReplacer<'a, I: Iterator<Item = Event<'a>>> {
366 iter: I,
367 inner: LinkReplacerInner<'a>,
368}
369
370impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, I> {
371 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
372 LinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
373 }
374}
375
376struct SpannedLinkReplacer<'a, I: Iterator<Item = SpannedEvent<'a>>> {
379 iter: I,
380 inner: LinkReplacerInner<'a>,
381}
382
383impl<'a, I: Iterator<Item = SpannedEvent<'a>>> SpannedLinkReplacer<'a, I> {
384 fn new(iter: I, links: &'a [RenderedLink]) -> Self {
385 SpannedLinkReplacer { iter, inner: { LinkReplacerInner { links, shortcut_link: None } } }
386 }
387}
388
389impl<'a> LinkReplacerInner<'a> {
390 fn handle_event(&mut self, event: &mut Event<'a>) {
391 match event {
393 Event::Start(Tag::Link {
396 link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
398 dest_url,
399 title,
400 ..
401 }) => {
402 debug!("saw start of shortcut link to {dest_url} with title {title}");
403 let link = self.links.iter().find(|&link| *link.href == **dest_url);
406 if let Some(link) = link {
409 trace!("it matched");
410 assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested");
411 self.shortcut_link = Some(link);
412 if title.is_empty() && !link.tooltip.is_empty() {
413 *title = CowStr::Borrowed(link.tooltip.as_ref());
414 }
415 }
416 }
417 Event::End(TagEnd::Link) if self.shortcut_link.is_some() => {
419 debug!("saw end of shortcut link");
420 self.shortcut_link = None;
421 }
422 Event::Code(text) => {
425 trace!("saw code {text}");
426 if let Some(link) = self.shortcut_link {
427 if let Some(link) = self.links.iter().find(|l| {
437 l.href == link.href
438 && Some(&**text) == l.original_text.get(1..l.original_text.len() - 1)
439 }) {
440 debug!("replacing {text} with {new_text}", new_text = link.new_text);
441 *text = CowStr::Borrowed(&link.new_text);
442 }
443 }
444 }
445 Event::Text(text) => {
448 trace!("saw text {text}");
449 if let Some(link) = self.shortcut_link {
450 if let Some(link) = self
452 .links
453 .iter()
454 .find(|l| l.href == link.href && **text == *l.original_text)
455 {
456 debug!("replacing {text} with {new_text}", new_text = link.new_text);
457 *text = CowStr::Borrowed(&link.new_text);
458 }
459 }
460 }
461 Event::Start(Tag::Link { dest_url, title, .. }) => {
464 if let Some(link) =
465 self.links.iter().find(|&link| *link.original_text == **dest_url)
466 {
467 *dest_url = CowStr::Borrowed(link.href.as_ref());
468 if title.is_empty() && !link.tooltip.is_empty() {
469 *title = CowStr::Borrowed(link.tooltip.as_ref());
470 }
471 }
472 }
473 _ => {}
475 }
476 }
477}
478
479impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
480 type Item = Event<'a>;
481
482 fn next(&mut self) -> Option<Self::Item> {
483 let mut event = self.iter.next();
484 if let Some(ref mut event) = event {
485 self.inner.handle_event(event);
486 }
487 event
489 }
490}
491
492impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for SpannedLinkReplacer<'a, I> {
493 type Item = SpannedEvent<'a>;
494
495 fn next(&mut self) -> Option<Self::Item> {
496 let (mut event, range) = self.iter.next()?;
497 self.inner.handle_event(&mut event);
498 Some((event, range))
500 }
501}
502
503struct TableWrapper<'a, I: Iterator<Item = Event<'a>>> {
505 inner: I,
506 stored_events: VecDeque<Event<'a>>,
507}
508
509impl<'a, I: Iterator<Item = Event<'a>>> TableWrapper<'a, I> {
510 fn new(iter: I) -> Self {
511 Self { inner: iter, stored_events: VecDeque::new() }
512 }
513}
514
515impl<'a, I: Iterator<Item = Event<'a>>> Iterator for TableWrapper<'a, I> {
516 type Item = Event<'a>;
517
518 fn next(&mut self) -> Option<Self::Item> {
519 if let Some(first) = self.stored_events.pop_front() {
520 return Some(first);
521 }
522
523 let event = self.inner.next()?;
524
525 Some(match event {
526 Event::Start(Tag::Table(t)) => {
527 self.stored_events.push_back(Event::Start(Tag::Table(t)));
528 Event::Html(CowStr::Borrowed("<div>"))
529 }
530 Event::End(TagEnd::Table) => {
531 self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
532 Event::End(TagEnd::Table)
533 }
534 e => e,
535 })
536 }
537}
538
539type SpannedEvent<'a> = (Event<'a>, Range<usize>);
540
541struct HeadingLinks<'a, 'b, 'ids, I> {
543 inner: I,
544 toc: Option<&'b mut TocBuilder>,
545 buf: VecDeque<SpannedEvent<'a>>,
546 id_map: &'ids mut IdMap,
547 heading_offset: HeadingOffset,
548}
549
550impl<'b, 'ids, I> HeadingLinks<'_, 'b, 'ids, I> {
551 fn new(
552 iter: I,
553 toc: Option<&'b mut TocBuilder>,
554 ids: &'ids mut IdMap,
555 heading_offset: HeadingOffset,
556 ) -> Self {
557 HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids, heading_offset }
558 }
559}
560
561impl<'a, I: Iterator<Item = SpannedEvent<'a>>> Iterator for HeadingLinks<'a, '_, '_, I> {
562 type Item = SpannedEvent<'a>;
563
564 fn next(&mut self) -> Option<Self::Item> {
565 if let Some(e) = self.buf.pop_front() {
566 return Some(e);
567 }
568
569 let event = self.inner.next();
570 if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
571 let mut id = String::new();
572 for event in &mut self.inner {
573 match &event.0 {
574 Event::End(TagEnd::Heading(_)) => break,
575 Event::Text(text) | Event::Code(text) => {
576 id.extend(text.chars().filter_map(slugify));
577 self.buf.push_back(event);
578 }
579 _ => self.buf.push_back(event),
580 }
581 }
582 let id = self.id_map.derive(id);
583
584 if let Some(ref mut builder) = self.toc {
585 let mut text_header = String::new();
586 plain_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut text_header);
587 let mut html_header = String::new();
588 html_text_from_events(self.buf.iter().map(|(ev, _)| ev.clone()), &mut html_header);
589 let sec = builder.push(level as u32, text_header, html_header, id.clone());
590 self.buf.push_front((Event::Html(format!("{sec} ").into()), 0..0));
591 }
592
593 let level =
594 std::cmp::min(level as u32 + (self.heading_offset as u32), MAX_HEADER_LEVEL);
595 self.buf.push_back((Event::Html(format!("</h{level}>").into()), 0..0));
596
597 let start_tags =
598 format!("<h{level} id=\"{id}\"><a class=\"doc-anchor\" href=\"#{id}\">§</a>");
599 return Some((Event::Html(start_tags.into()), 0..0));
600 }
601 event
602 }
603}
604
605struct SummaryLine<'a, I: Iterator<Item = Event<'a>>> {
607 inner: I,
608 started: bool,
609 depth: u32,
610 skipped_tags: u32,
611}
612
613impl<'a, I: Iterator<Item = Event<'a>>> SummaryLine<'a, I> {
614 fn new(iter: I) -> Self {
615 SummaryLine { inner: iter, started: false, depth: 0, skipped_tags: 0 }
616 }
617}
618
619fn check_if_allowed_tag(t: &TagEnd) -> bool {
620 matches!(
621 t,
622 TagEnd::Paragraph
623 | TagEnd::Emphasis
624 | TagEnd::Strong
625 | TagEnd::Strikethrough
626 | TagEnd::Link
627 | TagEnd::BlockQuote
628 )
629}
630
631fn is_forbidden_tag(t: &TagEnd) -> bool {
632 matches!(
633 t,
634 TagEnd::CodeBlock
635 | TagEnd::Table
636 | TagEnd::TableHead
637 | TagEnd::TableRow
638 | TagEnd::TableCell
639 | TagEnd::FootnoteDefinition
640 )
641}
642
643impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
644 type Item = Event<'a>;
645
646 fn next(&mut self) -> Option<Self::Item> {
647 if self.started && self.depth == 0 {
648 return None;
649 }
650 if !self.started {
651 self.started = true;
652 }
653 if let Some(event) = self.inner.next() {
654 let mut is_start = true;
655 let is_allowed_tag = match event {
656 Event::Start(ref c) => {
657 if is_forbidden_tag(&c.to_end()) {
658 self.skipped_tags += 1;
659 return None;
660 }
661 self.depth += 1;
662 check_if_allowed_tag(&c.to_end())
663 }
664 Event::End(ref c) => {
665 if is_forbidden_tag(c) {
666 self.skipped_tags += 1;
667 return None;
668 }
669 self.depth -= 1;
670 is_start = false;
671 check_if_allowed_tag(c)
672 }
673 Event::FootnoteReference(_) => {
674 self.skipped_tags += 1;
675 false
676 }
677 _ => true,
678 };
679 if !is_allowed_tag {
680 self.skipped_tags += 1;
681 }
682 return if !is_allowed_tag {
683 if is_start {
684 Some(Event::Start(Tag::Paragraph))
685 } else {
686 Some(Event::End(TagEnd::Paragraph))
687 }
688 } else {
689 Some(event)
690 };
691 }
692 None
693 }
694}
695
696pub(crate) struct MdRelLine {
703 offset: usize,
704}
705
706impl MdRelLine {
707 pub(crate) const fn new(offset: usize) -> Self {
709 Self { offset }
710 }
711
712 pub(crate) const fn offset(self) -> usize {
714 self.offset
715 }
716}
717
718pub(crate) fn find_testable_code<T: doctest::DocTestVisitor>(
719 doc: &str,
720 tests: &mut T,
721 error_codes: ErrorCodes,
722 extra_info: Option<&ExtraInfo<'_>>,
723) {
724 find_codes(doc, tests, error_codes, extra_info, false)
725}
726
727pub(crate) fn find_codes<T: doctest::DocTestVisitor>(
728 doc: &str,
729 tests: &mut T,
730 error_codes: ErrorCodes,
731 extra_info: Option<&ExtraInfo<'_>>,
732 include_non_rust: bool,
733) {
734 let mut parser = Parser::new_ext(doc, main_body_opts()).into_offset_iter();
735 let mut prev_offset = 0;
736 let mut nb_lines = 0;
737 let mut register_header = None;
738 while let Some((event, offset)) = parser.next() {
739 match event {
740 Event::Start(Tag::CodeBlock(kind)) => {
741 let block_info = match kind {
742 CodeBlockKind::Fenced(ref lang) => {
743 if lang.is_empty() {
744 Default::default()
745 } else {
746 LangString::parse(lang, error_codes, extra_info)
747 }
748 }
749 CodeBlockKind::Indented => Default::default(),
750 };
751 if !include_non_rust && !block_info.rust {
752 continue;
753 }
754
755 let mut test_s = String::new();
756
757 while let Some((Event::Text(s), _)) = parser.next() {
758 test_s.push_str(&s);
759 }
760 let text = test_s
761 .lines()
762 .map(|l| map_line(l).for_code())
763 .collect::<Vec<Cow<'_, str>>>()
764 .join("\n");
765
766 nb_lines += doc[prev_offset..offset.start].lines().count();
767 if nb_lines != 0 && !&doc[prev_offset..offset.start].ends_with('\n') {
771 nb_lines -= 1;
772 }
773 let line = MdRelLine::new(nb_lines);
774 tests.visit_test(text, block_info, line);
775 prev_offset = offset.start;
776 }
777 Event::Start(Tag::Heading { level, .. }) => {
778 register_header = Some(level as u32);
779 }
780 Event::Text(ref s) if register_header.is_some() => {
781 let level = register_header.unwrap();
782 tests.visit_header(s, level);
783 register_header = None;
784 }
785 _ => {}
786 }
787 }
788}
789
790pub(crate) struct ExtraInfo<'tcx> {
791 def_id: LocalDefId,
792 sp: Span,
793 tcx: TyCtxt<'tcx>,
794}
795
796impl<'tcx> ExtraInfo<'tcx> {
797 pub(crate) fn new(tcx: TyCtxt<'tcx>, def_id: LocalDefId, sp: Span) -> ExtraInfo<'tcx> {
798 ExtraInfo { def_id, sp, tcx }
799 }
800
801 fn error_invalid_codeblock_attr(&self, msg: impl Into<DiagMessage>) {
802 self.tcx.node_span_lint(
803 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
804 self.tcx.local_def_id_to_hir_id(self.def_id),
805 self.sp,
806 |lint| {
807 lint.primary_message(msg);
808 },
809 );
810 }
811
812 fn error_invalid_codeblock_attr_with_help(
813 &self,
814 msg: impl Into<DiagMessage>,
815 f: impl for<'a, 'b> FnOnce(&'b mut Diag<'a, ()>),
816 ) {
817 self.tcx.node_span_lint(
818 crate::lint::INVALID_CODEBLOCK_ATTRIBUTES,
819 self.tcx.local_def_id_to_hir_id(self.def_id),
820 self.sp,
821 |lint| {
822 lint.primary_message(msg);
823 f(lint);
824 },
825 );
826 }
827}
828
829#[derive(Eq, PartialEq, Clone, Debug)]
830pub(crate) struct LangString {
831 pub(crate) original: String,
832 pub(crate) should_panic: bool,
833 pub(crate) no_run: bool,
834 pub(crate) ignore: Ignore,
835 pub(crate) rust: bool,
836 pub(crate) test_harness: bool,
837 pub(crate) compile_fail: bool,
838 pub(crate) standalone_crate: bool,
839 pub(crate) error_codes: Vec<String>,
840 pub(crate) edition: Option<Edition>,
841 pub(crate) added_classes: Vec<String>,
842 pub(crate) unknown: Vec<String>,
843}
844
845#[derive(Eq, PartialEq, Clone, Debug)]
846pub(crate) enum Ignore {
847 All,
848 None,
849 Some(Vec<String>),
850}
851
852pub(crate) struct TagIterator<'a, 'tcx> {
892 inner: Peekable<CharIndices<'a>>,
893 data: &'a str,
894 is_in_attribute_block: bool,
895 extra: Option<&'a ExtraInfo<'tcx>>,
896 is_error: bool,
897}
898
899#[derive(Clone, Debug, Eq, PartialEq)]
900pub(crate) enum LangStringToken<'a> {
901 LangToken(&'a str),
902 ClassAttribute(&'a str),
903 KeyValueAttribute(&'a str, &'a str),
904}
905
906fn is_leading_char(c: char) -> bool {
907 c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
908}
909fn is_bareword_char(c: char) -> bool {
910 is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
911}
912fn is_separator(c: char) -> bool {
913 c == ' ' || c == ',' || c == '\t'
914}
915
916struct Indices {
917 start: usize,
918 end: usize,
919}
920
921impl<'a, 'tcx> TagIterator<'a, 'tcx> {
922 pub(crate) fn new(data: &'a str, extra: Option<&'a ExtraInfo<'tcx>>) -> Self {
923 Self {
924 inner: data.char_indices().peekable(),
925 data,
926 is_in_attribute_block: false,
927 extra,
928 is_error: false,
929 }
930 }
931
932 fn emit_error(&mut self, err: impl Into<DiagMessage>) {
933 if let Some(extra) = self.extra {
934 extra.error_invalid_codeblock_attr(err);
935 }
936 self.is_error = true;
937 }
938
939 fn skip_separators(&mut self) -> Option<usize> {
940 while let Some((pos, c)) = self.inner.peek() {
941 if !is_separator(*c) {
942 return Some(*pos);
943 }
944 self.inner.next();
945 }
946 None
947 }
948
949 fn parse_string(&mut self, start: usize) -> Option<Indices> {
950 for (pos, c) in self.inner.by_ref() {
951 if c == '"' {
952 return Some(Indices { start: start + 1, end: pos });
953 }
954 }
955 self.emit_error("unclosed quote string `\"`");
956 None
957 }
958
959 fn parse_class(&mut self, start: usize) -> Option<LangStringToken<'a>> {
960 while let Some((pos, c)) = self.inner.peek().copied() {
961 if is_bareword_char(c) {
962 self.inner.next();
963 } else {
964 let class = &self.data[start + 1..pos];
965 if class.is_empty() {
966 self.emit_error(format!("unexpected `{c}` character after `.`"));
967 return None;
968 } else if self.check_after_token() {
969 return Some(LangStringToken::ClassAttribute(class));
970 } else {
971 return None;
972 }
973 }
974 }
975 let class = &self.data[start + 1..];
976 if class.is_empty() {
977 self.emit_error("missing character after `.`");
978 None
979 } else if self.check_after_token() {
980 Some(LangStringToken::ClassAttribute(class))
981 } else {
982 None
983 }
984 }
985
986 fn parse_token(&mut self, start: usize) -> Option<Indices> {
987 while let Some((pos, c)) = self.inner.peek() {
988 if !is_bareword_char(*c) {
989 return Some(Indices { start, end: *pos });
990 }
991 self.inner.next();
992 }
993 self.emit_error("unexpected end");
994 None
995 }
996
997 fn parse_key_value(&mut self, c: char, start: usize) -> Option<LangStringToken<'a>> {
998 let key_indices =
999 if c == '"' { self.parse_string(start)? } else { self.parse_token(start)? };
1000 if key_indices.start == key_indices.end {
1001 self.emit_error("unexpected empty string as key");
1002 return None;
1003 }
1004
1005 if let Some((_, c)) = self.inner.next() {
1006 if c != '=' {
1007 self.emit_error(format!("expected `=`, found `{c}`"));
1008 return None;
1009 }
1010 } else {
1011 self.emit_error("unexpected end");
1012 return None;
1013 }
1014 let value_indices = match self.inner.next() {
1015 Some((pos, '"')) => self.parse_string(pos)?,
1016 Some((pos, c)) if is_bareword_char(c) => self.parse_token(pos)?,
1017 Some((_, c)) => {
1018 self.emit_error(format!("unexpected `{c}` character after `=`"));
1019 return None;
1020 }
1021 None => {
1022 self.emit_error("expected value after `=`");
1023 return None;
1024 }
1025 };
1026 if value_indices.start == value_indices.end {
1027 self.emit_error("unexpected empty string as value");
1028 None
1029 } else if self.check_after_token() {
1030 Some(LangStringToken::KeyValueAttribute(
1031 &self.data[key_indices.start..key_indices.end],
1032 &self.data[value_indices.start..value_indices.end],
1033 ))
1034 } else {
1035 None
1036 }
1037 }
1038
1039 fn check_after_token(&mut self) -> bool {
1041 if let Some((_, c)) = self.inner.peek().copied() {
1042 if c == '}' || is_separator(c) || c == '(' {
1043 true
1044 } else {
1045 self.emit_error(format!("unexpected `{c}` character"));
1046 false
1047 }
1048 } else {
1049 true
1051 }
1052 }
1053
1054 fn parse_in_attribute_block(&mut self) -> Option<LangStringToken<'a>> {
1055 if let Some((pos, c)) = self.inner.next() {
1056 if c == '}' {
1057 self.is_in_attribute_block = false;
1058 return self.next();
1059 } else if c == '.' {
1060 return self.parse_class(pos);
1061 } else if c == '"' || is_leading_char(c) {
1062 return self.parse_key_value(c, pos);
1063 } else {
1064 self.emit_error(format!("unexpected character `{c}`"));
1065 return None;
1066 }
1067 }
1068 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1069 None
1070 }
1071
1072 fn skip_paren_block(&mut self) -> bool {
1074 for (_, c) in self.inner.by_ref() {
1075 if c == ')' {
1076 return true;
1077 }
1078 }
1079 self.emit_error("unclosed comment: missing `)` at the end");
1080 false
1081 }
1082
1083 fn parse_outside_attribute_block(&mut self, start: usize) -> Option<LangStringToken<'a>> {
1084 while let Some((pos, c)) = self.inner.next() {
1085 if c == '"' {
1086 if pos != start {
1087 self.emit_error("expected ` `, `{` or `,` found `\"`");
1088 return None;
1089 }
1090 let indices = self.parse_string(pos)?;
1091 if let Some((_, c)) = self.inner.peek().copied()
1092 && c != '{'
1093 && !is_separator(c)
1094 && c != '('
1095 {
1096 self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
1097 return None;
1098 }
1099 return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
1100 } else if c == '{' {
1101 self.is_in_attribute_block = true;
1102 return self.next();
1103 } else if is_separator(c) {
1104 if pos != start {
1105 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1106 }
1107 return self.next();
1108 } else if c == '(' {
1109 if !self.skip_paren_block() {
1110 return None;
1111 }
1112 if pos != start {
1113 return Some(LangStringToken::LangToken(&self.data[start..pos]));
1114 }
1115 return self.next();
1116 } else if (pos == start && is_leading_char(c)) || (pos != start && is_bareword_char(c))
1117 {
1118 continue;
1119 } else {
1120 self.emit_error(format!("unexpected character `{c}`"));
1121 return None;
1122 }
1123 }
1124 let token = &self.data[start..];
1125 if token.is_empty() { None } else { Some(LangStringToken::LangToken(&self.data[start..])) }
1126 }
1127}
1128
1129impl<'a> Iterator for TagIterator<'a, '_> {
1130 type Item = LangStringToken<'a>;
1131
1132 fn next(&mut self) -> Option<Self::Item> {
1133 if self.is_error {
1134 return None;
1135 }
1136 let Some(start) = self.skip_separators() else {
1137 if self.is_in_attribute_block {
1138 self.emit_error("unclosed attribute block (`{}`): missing `}` at the end");
1139 }
1140 return None;
1141 };
1142 if self.is_in_attribute_block {
1143 self.parse_in_attribute_block()
1144 } else {
1145 self.parse_outside_attribute_block(start)
1146 }
1147 }
1148}
1149
1150impl Default for LangString {
1151 fn default() -> Self {
1152 Self {
1153 original: String::new(),
1154 should_panic: false,
1155 no_run: false,
1156 ignore: Ignore::None,
1157 rust: true,
1158 test_harness: false,
1159 compile_fail: false,
1160 standalone_crate: false,
1161 error_codes: Vec::new(),
1162 edition: None,
1163 added_classes: Vec::new(),
1164 unknown: Vec::new(),
1165 }
1166 }
1167}
1168
1169impl LangString {
1170 fn parse_without_check(string: &str, allow_error_code_check: ErrorCodes) -> Self {
1171 Self::parse(string, allow_error_code_check, None)
1172 }
1173
1174 fn parse(
1175 string: &str,
1176 allow_error_code_check: ErrorCodes,
1177 extra: Option<&ExtraInfo<'_>>,
1178 ) -> Self {
1179 let allow_error_code_check = allow_error_code_check.as_bool();
1180 let mut seen_rust_tags = false;
1181 let mut seen_other_tags = false;
1182 let mut seen_custom_tag = false;
1183 let mut data = LangString::default();
1184 let mut ignores = vec![];
1185
1186 data.original = string.to_owned();
1187
1188 let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
1189 for token in tokens {
1190 match token {
1191 LangStringToken::LangToken("should_panic") => {
1192 data.should_panic = true;
1193 seen_rust_tags = !seen_other_tags;
1194 }
1195 LangStringToken::LangToken("no_run") => {
1196 data.no_run = true;
1197 seen_rust_tags = !seen_other_tags;
1198 }
1199 LangStringToken::LangToken("ignore") => {
1200 data.ignore = Ignore::All;
1201 seen_rust_tags = !seen_other_tags;
1202 }
1203 LangStringToken::LangToken(x)
1204 if let Some(ignore) = x.strip_prefix("ignore-") =>
1205 {
1206 ignores.push(ignore.to_owned());
1207 seen_rust_tags = !seen_other_tags;
1208 }
1209 LangStringToken::LangToken("rust") => {
1210 data.rust = true;
1211 seen_rust_tags = true;
1212 }
1213 LangStringToken::LangToken("custom") => {
1214 seen_custom_tag = true;
1215 }
1216 LangStringToken::LangToken("test_harness") => {
1217 data.test_harness = true;
1218 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1219 }
1220 LangStringToken::LangToken("compile_fail") => {
1221 data.compile_fail = true;
1222 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1223 data.no_run = true;
1224 }
1225 LangStringToken::LangToken("standalone_crate") => {
1226 data.standalone_crate = true;
1227 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1228 }
1229 LangStringToken::LangToken(x)
1230 if let Some(edition) = x.strip_prefix("edition") =>
1231 {
1232 data.edition = edition.parse::<Edition>().ok();
1233 }
1234 LangStringToken::LangToken(x)
1235 if let Some(edition) = x.strip_prefix("rust")
1236 && edition.parse::<Edition>().is_ok()
1237 && let Some(extra) = extra =>
1238 {
1239 extra.error_invalid_codeblock_attr_with_help(
1240 format!("unknown attribute `{x}`"),
1241 |lint| {
1242 lint.help(format!(
1243 "there is an attribute with a similar name: `edition{edition}`"
1244 ));
1245 },
1246 );
1247 }
1248 LangStringToken::LangToken(x)
1249 if allow_error_code_check
1250 && let Some(error_code) = x.strip_prefix('E')
1251 && error_code.len() == 4 =>
1252 {
1253 if error_code.parse::<u32>().is_ok() {
1254 data.error_codes.push(x.to_owned());
1255 seen_rust_tags = !seen_other_tags || seen_rust_tags;
1256 } else {
1257 seen_other_tags = true;
1258 }
1259 }
1260 LangStringToken::LangToken(x) if let Some(extra) = extra => {
1261 if let Some(help) = match x.to_lowercase().as_str() {
1262 "compile-fail" | "compile_fail" | "compilefail" => Some(
1263 "use `compile_fail` to invert the results of this test, so that it \
1264 passes if it cannot be compiled and fails if it can",
1265 ),
1266 "should-panic" | "should_panic" | "shouldpanic" => Some(
1267 "use `should_panic` to invert the results of this test, so that if \
1268 passes if it panics and fails if it does not",
1269 ),
1270 "no-run" | "no_run" | "norun" => Some(
1271 "use `no_run` to compile, but not run, the code sample during \
1272 testing",
1273 ),
1274 "test-harness" | "test_harness" | "testharness" => Some(
1275 "use `test_harness` to run functions marked `#[test]` instead of a \
1276 potentially-implicit `main` function",
1277 ),
1278 "standalone" | "standalone_crate" | "standalone-crate"
1279 if extra.sp.at_least_rust_2024() =>
1280 {
1281 Some(
1282 "use `standalone_crate` to compile this code block \
1283 separately",
1284 )
1285 }
1286 _ => None,
1287 } {
1288 extra.error_invalid_codeblock_attr_with_help(
1289 format!("unknown attribute `{x}`"),
1290 |lint| {
1291 lint.help(help).help(
1292 "this code block may be skipped during testing, \
1293 because unknown attributes are treated as markers for \
1294 code samples written in other programming languages, \
1295 unless it is also explicitly marked as `rust`",
1296 );
1297 },
1298 );
1299 }
1300 seen_other_tags = true;
1301 data.unknown.push(x.to_owned());
1302 }
1303 LangStringToken::LangToken(x) => {
1304 seen_other_tags = true;
1305 data.unknown.push(x.to_owned());
1306 }
1307 LangStringToken::KeyValueAttribute("class", value) => {
1308 data.added_classes.push(value.to_owned());
1309 }
1310 LangStringToken::KeyValueAttribute(key, ..) if let Some(extra) = extra => {
1311 extra
1312 .error_invalid_codeblock_attr(format!("unsupported attribute `{key}`"));
1313 }
1314 LangStringToken::ClassAttribute(class) => {
1315 data.added_classes.push(class.to_owned());
1316 }
1317 _ => {}
1318 }
1319 }
1320 };
1321
1322 let mut tag_iter = TagIterator::new(string, extra);
1323 call(&mut tag_iter);
1324
1325 if !ignores.is_empty() {
1327 data.ignore = Ignore::Some(ignores);
1328 }
1329
1330 data.rust &= !seen_custom_tag && (!seen_other_tags || seen_rust_tags) && !tag_iter.is_error;
1331
1332 data
1333 }
1334}
1335
1336impl<'a> Markdown<'a> {
1337 pub fn write_into(self, f: impl fmt::Write) -> fmt::Result {
1338 if self.content.is_empty() {
1340 return Ok(());
1341 }
1342
1343 html::write_html_fmt(f, self.into_iter())
1344 }
1345
1346 fn into_iter(self) -> CodeBlocks<'a, 'a, impl Iterator<Item = Event<'a>>> {
1347 let Markdown {
1348 content: md,
1349 links,
1350 ids,
1351 error_codes: codes,
1352 edition,
1353 playground,
1354 heading_offset,
1355 } = self;
1356
1357 let replacer = move |broken_link: BrokenLink<'_>| {
1358 links
1359 .iter()
1360 .find(|link| *link.original_text == *broken_link.reference)
1361 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1362 };
1363
1364 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1365 let p = p.into_offset_iter();
1366
1367 ids.handle_footnotes(|ids, existing_footnotes| {
1368 let p = HeadingLinks::new(p, None, ids, heading_offset);
1369 let p = SpannedLinkReplacer::new(p, links);
1370 let p = footnotes::Footnotes::new(p, existing_footnotes);
1371 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1372 CodeBlocks::new(p, codes, edition, playground)
1373 })
1374 }
1375
1376 pub(crate) fn split_summary_and_content(self) -> (Option<String>, Option<String>) {
1382 if self.content.is_empty() {
1383 return (None, None);
1384 }
1385 let mut p = self.into_iter();
1386
1387 let mut event_level = 0;
1388 let mut summary_events = Vec::new();
1389 let mut get_next_tag = false;
1390
1391 let mut end_of_summary = false;
1392 while let Some(event) = p.next() {
1393 match event {
1394 Event::Start(_) => event_level += 1,
1395 Event::End(kind) => {
1396 event_level -= 1;
1397 if event_level == 0 {
1398 end_of_summary = true;
1400 get_next_tag = kind == TagEnd::Table;
1402 }
1403 }
1404 _ => {}
1405 }
1406 summary_events.push(event);
1407 if end_of_summary {
1408 if get_next_tag && let Some(event) = p.next() {
1409 summary_events.push(event);
1410 }
1411 break;
1412 }
1413 }
1414 let mut summary = String::new();
1415 html::push_html(&mut summary, summary_events.into_iter());
1416 if summary.is_empty() {
1417 return (None, None);
1418 }
1419 let mut content = String::new();
1420 html::push_html(&mut content, p);
1421
1422 if content.is_empty() { (Some(summary), None) } else { (Some(summary), Some(content)) }
1423 }
1424}
1425
1426impl MarkdownWithToc<'_> {
1427 pub(crate) fn into_parts(self) -> (Toc, String) {
1428 let MarkdownWithToc { content: md, links, ids, error_codes: codes, edition, playground } =
1429 self;
1430
1431 if md.is_empty() {
1433 return (Toc { entries: Vec::new() }, String::new());
1434 }
1435 let mut replacer = |broken_link: BrokenLink<'_>| {
1436 links
1437 .iter()
1438 .find(|link| *link.original_text == *broken_link.reference)
1439 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1440 };
1441
1442 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut replacer));
1443 let p = p.into_offset_iter();
1444
1445 let mut s = String::with_capacity(md.len() * 3 / 2);
1446
1447 let mut toc = TocBuilder::new();
1448
1449 ids.handle_footnotes(|ids, existing_footnotes| {
1450 let p = HeadingLinks::new(p, Some(&mut toc), ids, HeadingOffset::H1);
1451 let p = footnotes::Footnotes::new(p, existing_footnotes);
1452 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1453 let p = CodeBlocks::new(p, codes, edition, playground);
1454 html::push_html(&mut s, p);
1455 });
1456
1457 (toc.into_toc(), s)
1458 }
1459
1460 pub(crate) fn write_into(self, mut f: impl fmt::Write) -> fmt::Result {
1461 let (toc, s) = self.into_parts();
1462 write!(f, "<nav id=\"rustdoc\">{toc}</nav>{s}", toc = toc.print())
1463 }
1464}
1465
1466impl<'a> MarkdownItemInfo<'a> {
1467 pub(crate) fn new(content: &'a str, links: &'a [RenderedLink], ids: &'a mut IdMap) -> Self {
1468 Self { content, links, ids }
1469 }
1470
1471 pub(crate) fn write_into(self, mut f: impl fmt::Write) -> fmt::Result {
1472 let MarkdownItemInfo { content: md, links, ids } = self;
1473
1474 if md.is_empty() {
1476 return Ok(());
1477 }
1478
1479 let replacer = move |broken_link: BrokenLink<'_>| {
1480 links
1481 .iter()
1482 .find(|link| *link.original_text == *broken_link.reference)
1483 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1484 };
1485
1486 let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(replacer));
1487 let p = p.into_offset_iter();
1488
1489 let p = p.map(|event| match event.0 {
1491 Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
1492 _ => event,
1493 });
1494
1495 ids.handle_footnotes(|ids, existing_footnotes| {
1496 let p = HeadingLinks::new(p, None, ids, HeadingOffset::H1);
1497 let p = SpannedLinkReplacer::new(p, links);
1498 let p = footnotes::Footnotes::new(p, existing_footnotes);
1499 let p = TableWrapper::new(p.map(|(ev, _)| ev));
1500 let p = p.filter(|event| {
1501 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1502 });
1503 html::write_html_fmt(&mut f, p)
1504 })
1505 }
1506}
1507
1508impl MarkdownSummaryLine<'_> {
1509 pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
1510 let MarkdownSummaryLine(md, links) = self;
1511 if md.is_empty() {
1513 return (String::new(), false);
1514 }
1515
1516 let mut replacer = |broken_link: BrokenLink<'_>| {
1517 links
1518 .iter()
1519 .find(|link| *link.original_text == *broken_link.reference)
1520 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1521 };
1522
1523 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer))
1524 .peekable();
1525 let mut summary = SummaryLine::new(p);
1526
1527 let mut s = String::new();
1528
1529 let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
1530 !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
1531 });
1532
1533 html::push_html(&mut s, without_paragraphs);
1534
1535 let has_more_content =
1536 matches!(summary.inner.peek(), Some(Event::Start(_))) || summary.skipped_tags > 0;
1537
1538 (s, has_more_content)
1539 }
1540
1541 pub(crate) fn into_string(self) -> String {
1542 self.into_string_with_has_more_content().0
1543 }
1544}
1545
1546fn markdown_summary_with_limit(
1555 md: &str,
1556 link_names: &[RenderedLink],
1557 length_limit: usize,
1558) -> (String, bool) {
1559 if md.is_empty() {
1560 return (String::new(), false);
1561 }
1562
1563 let mut replacer = |broken_link: BrokenLink<'_>| {
1564 link_names
1565 .iter()
1566 .find(|link| *link.original_text == *broken_link.reference)
1567 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1568 };
1569
1570 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1571 let mut p = LinkReplacer::new(p, link_names);
1572
1573 let mut buf = HtmlWithLimit::new(length_limit);
1574 let mut stopped_early = false;
1575 let _ = p.try_for_each(|event| {
1576 match &event {
1577 Event::Text(text) => {
1578 let r =
1579 text.split_inclusive(char::is_whitespace).try_for_each(|word| buf.push(word));
1580 if r.is_break() {
1581 stopped_early = true;
1582 }
1583 return r;
1584 }
1585 Event::Code(code) => {
1586 buf.open_tag("code");
1587 let r = buf.push(code);
1588 if r.is_break() {
1589 stopped_early = true;
1590 } else {
1591 buf.close_tag();
1592 }
1593 return r;
1594 }
1595 Event::Start(tag) => match tag {
1596 Tag::Emphasis => buf.open_tag("em"),
1597 Tag::Strong => buf.open_tag("strong"),
1598 Tag::CodeBlock(..) => return ControlFlow::Break(()),
1599 _ => {}
1600 },
1601 Event::End(tag) => match tag {
1602 TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
1603 TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
1604 _ => {}
1605 },
1606 Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
1607 _ => {}
1608 };
1609 ControlFlow::Continue(())
1610 });
1611
1612 (buf.finish(), stopped_early)
1613}
1614
1615pub(crate) fn short_markdown_summary(markdown: &str, link_names: &[RenderedLink]) -> String {
1622 let (mut s, was_shortened) = markdown_summary_with_limit(markdown, link_names, 59);
1623
1624 if was_shortened {
1625 s.push('…');
1626 }
1627
1628 s
1629}
1630
1631pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> String {
1638 if md.is_empty() {
1639 return String::new();
1640 }
1641
1642 let mut s = String::with_capacity(md.len() * 3 / 2);
1643
1644 let mut replacer = |broken_link: BrokenLink<'_>| {
1645 link_names
1646 .iter()
1647 .find(|link| *link.original_text == *broken_link.reference)
1648 .map(|link| (link.href.as_str().into(), link.tooltip.as_str().into()))
1649 };
1650
1651 let p = Parser::new_with_broken_link_callback(md, summary_opts(), Some(&mut replacer));
1652
1653 plain_text_from_events(p, &mut s);
1654
1655 s
1656}
1657
1658pub(crate) fn plain_text_from_events<'a>(
1659 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1660 s: &mut String,
1661) {
1662 for event in events {
1663 match &event {
1664 Event::Text(text) => s.push_str(text),
1665 Event::Code(code) => {
1666 s.push('`');
1667 s.push_str(code);
1668 s.push('`');
1669 }
1670 Event::HardBreak | Event::SoftBreak => s.push(' '),
1671 Event::Start(Tag::CodeBlock(..)) => break,
1672 Event::End(TagEnd::Paragraph) => break,
1673 Event::End(TagEnd::Heading(..)) => break,
1674 _ => (),
1675 }
1676 }
1677}
1678
1679pub(crate) fn html_text_from_events<'a>(
1680 events: impl Iterator<Item = pulldown_cmark::Event<'a>>,
1681 s: &mut String,
1682) {
1683 for event in events {
1684 match &event {
1685 Event::Text(text) => {
1686 write!(s, "{}", EscapeBodyText(text)).expect("string alloc infallible")
1687 }
1688 Event::Code(code) => {
1689 s.push_str("<code>");
1690 write!(s, "{}", EscapeBodyText(code)).expect("string alloc infallible");
1691 s.push_str("</code>");
1692 }
1693 Event::HardBreak | Event::SoftBreak => s.push(' '),
1694 Event::Start(Tag::CodeBlock(..)) => break,
1695 Event::End(TagEnd::Paragraph) => break,
1696 Event::End(TagEnd::Heading(..)) => break,
1697 _ => (),
1698 }
1699 }
1700}
1701
1702#[derive(Debug)]
1703pub(crate) struct MarkdownLink {
1704 pub kind: LinkType,
1705 pub link: String,
1706 pub range: MarkdownLinkRange,
1707}
1708
1709#[derive(Clone, Debug)]
1710pub(crate) enum MarkdownLinkRange {
1711 Destination(Range<usize>),
1713 WholeLink(Range<usize>),
1717}
1718
1719impl MarkdownLinkRange {
1720 pub fn inner_range(&self) -> &Range<usize> {
1722 match self {
1723 MarkdownLinkRange::Destination(range) => range,
1724 MarkdownLinkRange::WholeLink(range) => range,
1725 }
1726 }
1727}
1728
1729pub(crate) fn markdown_links<'md, R>(
1730 md: &'md str,
1731 preprocess_link: impl Fn(MarkdownLink) -> Option<R>,
1732) -> Vec<R> {
1733 use itertools::Itertools;
1734 if md.is_empty() {
1735 return vec![];
1736 }
1737
1738 let locate = |s: &str, fallback: Range<usize>| unsafe {
1740 let s_start = s.as_ptr();
1741 let s_end = s_start.add(s.len());
1742 let md_start = md.as_ptr();
1743 let md_end = md_start.add(md.len());
1744 if md_start <= s_start && s_end <= md_end {
1745 let start = s_start.offset_from(md_start) as usize;
1746 let end = s_end.offset_from(md_start) as usize;
1747 MarkdownLinkRange::Destination(start..end)
1748 } else {
1749 MarkdownLinkRange::WholeLink(fallback)
1750 }
1751 };
1752
1753 let span_for_link = |link: &CowStr<'_>, span: Range<usize>| {
1754 match link {
1759 CowStr::Borrowed(s) => locate(s, span),
1764
1765 CowStr::Boxed(_) | CowStr::Inlined(_) => MarkdownLinkRange::WholeLink(span),
1767 }
1768 };
1769
1770 let span_for_refdef = |link: &CowStr<'_>, span: Range<usize>| {
1771 let mut square_brace_count = 0;
1774 let mut iter = md.as_bytes()[span.start..span.end].iter().copied().enumerate();
1775 for (_i, c) in &mut iter {
1776 match c {
1777 b':' if square_brace_count == 0 => break,
1778 b'[' => square_brace_count += 1,
1779 b']' => square_brace_count -= 1,
1780 _ => {}
1781 }
1782 }
1783 while let Some((i, c)) = iter.next() {
1784 if c == b'<' {
1785 while let Some((j, c)) = iter.next() {
1786 match c {
1787 b'\\' => {
1788 let _ = iter.next();
1789 }
1790 b'>' => {
1791 return MarkdownLinkRange::Destination(
1792 i + 1 + span.start..j + span.start,
1793 );
1794 }
1795 _ => {}
1796 }
1797 }
1798 } else if !c.is_ascii_whitespace() {
1799 for (j, c) in iter.by_ref() {
1800 if c.is_ascii_whitespace() {
1801 return MarkdownLinkRange::Destination(i + span.start..j + span.start);
1802 }
1803 }
1804 return MarkdownLinkRange::Destination(i + span.start..span.end);
1805 }
1806 }
1807 span_for_link(link, span)
1808 };
1809
1810 let span_for_offset_backward = |span: Range<usize>, open: u8, close: u8| {
1811 let mut open_brace = !0;
1812 let mut close_brace = !0;
1813 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate().rev() {
1814 let i = i + span.start;
1815 if b == close {
1816 close_brace = i;
1817 break;
1818 }
1819 }
1820 if close_brace < span.start || close_brace >= span.end {
1821 return MarkdownLinkRange::WholeLink(span);
1822 }
1823 let mut nesting = 1;
1824 for (i, b) in md.as_bytes()[span.start..close_brace].iter().copied().enumerate().rev() {
1825 let i = i + span.start;
1826 if b == close {
1827 nesting += 1;
1828 }
1829 if b == open {
1830 nesting -= 1;
1831 }
1832 if nesting == 0 {
1833 open_brace = i;
1834 break;
1835 }
1836 }
1837 assert!(open_brace != close_brace);
1838 if open_brace < span.start || open_brace >= span.end {
1839 return MarkdownLinkRange::WholeLink(span);
1840 }
1841 let range = (open_brace + 1)..close_brace;
1843 MarkdownLinkRange::Destination(range)
1844 };
1845
1846 let span_for_offset_forward = |span: Range<usize>, open: u8, close: u8| {
1847 let mut open_brace = !0;
1848 let mut close_brace = !0;
1849 for (i, b) in md.as_bytes()[span.clone()].iter().copied().enumerate() {
1850 let i = i + span.start;
1851 if b == open {
1852 open_brace = i;
1853 break;
1854 }
1855 }
1856 if open_brace < span.start || open_brace >= span.end {
1857 return MarkdownLinkRange::WholeLink(span);
1858 }
1859 let mut nesting = 0;
1860 for (i, b) in md.as_bytes()[open_brace..span.end].iter().copied().enumerate() {
1861 let i = i + open_brace;
1862 if b == close {
1863 nesting -= 1;
1864 }
1865 if b == open {
1866 nesting += 1;
1867 }
1868 if nesting == 0 {
1869 close_brace = i;
1870 break;
1871 }
1872 }
1873 assert!(open_brace != close_brace);
1874 if open_brace < span.start || open_brace >= span.end {
1875 return MarkdownLinkRange::WholeLink(span);
1876 }
1877 let range = (open_brace + 1)..close_brace;
1879 MarkdownLinkRange::Destination(range)
1880 };
1881
1882 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
1883 let event_iter = Parser::new_with_broken_link_callback(
1884 md,
1885 main_body_opts(),
1886 Some(&mut broken_link_callback),
1887 )
1888 .into_offset_iter();
1889 let mut links = Vec::new();
1890
1891 let mut refdefs = FxIndexMap::default();
1892 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
1893 refdefs.insert(label.to_string(), (false, refdef.dest.to_string(), refdef.span.clone()));
1894 }
1895
1896 for (event, span) in event_iter {
1897 match event {
1898 Event::Start(Tag::Link { link_type, dest_url, id, .. })
1899 if may_be_doc_link(link_type) =>
1900 {
1901 let range = match link_type {
1902 LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
1904 span_for_offset_backward(span, b'[', b']')
1905 }
1906 LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'),
1907 LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
1908 LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
1910 if let Some((is_used, dest_url, span)) = refdefs.get_mut(&id[..]) {
1911 *is_used = true;
1912 span_for_refdef(&CowStr::from(&dest_url[..]), span.clone())
1913 } else {
1914 span_for_link(&dest_url, span)
1915 }
1916 }
1917 LinkType::Autolink | LinkType::Email => unreachable!(),
1918 };
1919
1920 if let Some(link) = preprocess_link(MarkdownLink {
1921 kind: link_type,
1922 link: dest_url.into_string(),
1923 range,
1924 }) {
1925 links.push(link);
1926 }
1927 }
1928 _ => {}
1929 }
1930 }
1931
1932 for (_label, (is_used, dest_url, span)) in refdefs.into_iter() {
1933 if !is_used
1934 && let Some(link) = preprocess_link(MarkdownLink {
1935 kind: LinkType::Reference,
1936 range: span_for_refdef(&CowStr::from(&dest_url[..]), span),
1937 link: dest_url,
1938 })
1939 {
1940 links.push(link);
1941 }
1942 }
1943
1944 links
1945}
1946
1947#[derive(Debug)]
1948pub(crate) struct RustCodeBlock {
1949 pub(crate) range: Range<usize>,
1952 pub(crate) code: Range<usize>,
1954 pub(crate) is_fenced: bool,
1955 pub(crate) lang_string: LangString,
1956}
1957
1958pub(crate) fn rust_code_blocks(md: &str, extra_info: &ExtraInfo<'_>) -> Vec<RustCodeBlock> {
1961 let mut code_blocks = vec![];
1962
1963 if md.is_empty() {
1964 return code_blocks;
1965 }
1966
1967 let mut p = Parser::new_ext(md, main_body_opts()).into_offset_iter();
1968
1969 while let Some((event, offset)) = p.next() {
1970 if let Event::Start(Tag::CodeBlock(syntax)) = event {
1971 let (lang_string, code_start, code_end, range, is_fenced) = match syntax {
1972 CodeBlockKind::Fenced(syntax) => {
1973 let syntax = syntax.as_ref();
1974 let lang_string = if syntax.is_empty() {
1975 Default::default()
1976 } else {
1977 LangString::parse(syntax, ErrorCodes::Yes, Some(extra_info))
1978 };
1979 if !lang_string.rust {
1980 continue;
1981 }
1982 let (code_start, mut code_end) = match p.next() {
1983 Some((Event::Text(_), offset)) => (offset.start, offset.end),
1984 Some((_, sub_offset)) => {
1985 let code = Range { start: sub_offset.start, end: sub_offset.start };
1986 code_blocks.push(RustCodeBlock {
1987 is_fenced: true,
1988 range: offset,
1989 code,
1990 lang_string,
1991 });
1992 continue;
1993 }
1994 None => {
1995 let code = Range { start: offset.end, end: offset.end };
1996 code_blocks.push(RustCodeBlock {
1997 is_fenced: true,
1998 range: offset,
1999 code,
2000 lang_string,
2001 });
2002 continue;
2003 }
2004 };
2005 while let Some((Event::Text(_), offset)) = p.next() {
2006 code_end = offset.end;
2007 }
2008 (lang_string, code_start, code_end, offset, true)
2009 }
2010 CodeBlockKind::Indented => {
2011 if offset.end > offset.start && md.get(offset.end..=offset.end) == Some("\n") {
2014 (
2015 LangString::default(),
2016 offset.start,
2017 offset.end,
2018 Range { start: offset.start, end: offset.end - 1 },
2019 false,
2020 )
2021 } else {
2022 (LangString::default(), offset.start, offset.end, offset, false)
2023 }
2024 }
2025 };
2026
2027 code_blocks.push(RustCodeBlock {
2028 is_fenced,
2029 range,
2030 code: Range { start: code_start, end: code_end },
2031 lang_string,
2032 });
2033 }
2034 }
2035
2036 code_blocks
2037}
2038
2039#[derive(Clone, Default, Debug)]
2040pub struct IdMap {
2041 map: FxHashMap<String, usize>,
2042 existing_footnotes: Arc<AtomicUsize>,
2043}
2044
2045fn is_default_id(id: &str) -> bool {
2046 matches!(
2047 id,
2048 "help"
2050 | "settings"
2051 | "not-displayed"
2052 | "alternative-display"
2053 | "search"
2054 | "crate-search"
2055 | "crate-search-div"
2056 | "themeStyle"
2059 | "settings-menu"
2060 | "help-button"
2061 | "sidebar-button"
2062 | "main-content"
2063 | "toggle-all-docs"
2064 | "all-types"
2065 | "default-settings"
2066 | "sidebar-vars"
2067 | "copy-path"
2068 | "rustdoc-toc"
2069 | "rustdoc-modnav"
2070 | "fields"
2073 | "variants"
2074 | "implementors-list"
2075 | "synthetic-implementors-list"
2076 | "foreign-impls"
2077 | "implementations"
2078 | "trait-implementations"
2079 | "synthetic-implementations"
2080 | "blanket-implementations"
2081 | "required-associated-types"
2082 | "provided-associated-types"
2083 | "provided-associated-consts"
2084 | "required-associated-consts"
2085 | "required-methods"
2086 | "provided-methods"
2087 | "dyn-compatibility"
2088 | "implementors"
2089 | "synthetic-implementors"
2090 | "implementations-list"
2091 | "trait-implementations-list"
2092 | "synthetic-implementations-list"
2093 | "blanket-implementations-list"
2094 | "deref-methods"
2095 | "layout"
2096 | "aliased-type"
2097 )
2098}
2099
2100impl IdMap {
2101 pub fn new() -> Self {
2102 IdMap { map: FxHashMap::default(), existing_footnotes: Arc::new(AtomicUsize::new(0)) }
2103 }
2104
2105 pub(crate) fn derive<S: AsRef<str> + ToString>(&mut self, candidate: S) -> String {
2106 let id = match self.map.get_mut(candidate.as_ref()) {
2107 None => {
2108 let candidate = candidate.to_string();
2109 if is_default_id(&candidate) {
2110 let id = format!("{}-{}", candidate, 1);
2111 self.map.insert(candidate, 2);
2112 id
2113 } else {
2114 candidate
2115 }
2116 }
2117 Some(a) => {
2118 let id = format!("{}-{}", candidate.as_ref(), *a);
2119 *a += 1;
2120 id
2121 }
2122 };
2123
2124 self.map.insert(id.clone(), 1);
2125 id
2126 }
2127
2128 pub(crate) fn handle_footnotes<'a, T, F: FnOnce(&'a mut Self, Weak<AtomicUsize>) -> T>(
2131 &'a mut self,
2132 closure: F,
2133 ) -> T {
2134 let existing_footnotes = Arc::downgrade(&self.existing_footnotes);
2135
2136 closure(self, existing_footnotes)
2137 }
2138
2139 pub(crate) fn clear(&mut self) {
2140 self.map.clear();
2141 self.existing_footnotes = Arc::new(AtomicUsize::new(0));
2142 }
2143}