1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5pub use pulldown_cmark;
7use pulldown_cmark::{
8 BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
9};
10use rustc_ast as ast;
11use rustc_ast::attr::AttributeExt;
12use rustc_ast::join_path_syms;
13use rustc_ast::util::comments::beautify_doc_string;
14use rustc_data_structures::fx::FxIndexMap;
15use rustc_data_structures::unord::UnordSet;
16use rustc_middle::ty::TyCtxt;
17use rustc_span::def_id::DefId;
18use rustc_span::source_map::SourceMap;
19use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
20use thin_vec::ThinVec;
21use tracing::{debug, trace};
22
23#[cfg(test)]
24mod tests;
25
26#[derive(Clone, Copy, PartialEq, Eq, Debug)]
27pub enum DocFragmentKind {
28 SugaredDoc,
30 RawDoc,
32}
33
34#[derive(Clone, PartialEq, Eq, Debug)]
43pub struct DocFragment {
44 pub span: Span,
45 pub item_id: Option<DefId>,
52 pub doc: Symbol,
53 pub kind: DocFragmentKind,
54 pub indent: usize,
55 pub from_expansion: bool,
58}
59
60#[derive(Clone, Copy, Debug)]
61pub enum MalformedGenerics {
62 UnbalancedAngleBrackets,
66 MissingType,
72 HasFullyQualifiedSyntax,
79 InvalidPathSeparator,
91 TooManyAngleBrackets,
95 EmptyAngleBrackets,
99}
100
101pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
115 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
128 && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
129 {
130 1
133 } else {
134 0
135 };
136
137 let Some(min_indent) = docs
147 .iter()
148 .map(|fragment| {
149 fragment
150 .doc
151 .as_str()
152 .lines()
153 .filter(|line| line.chars().any(|c| !c.is_whitespace()))
154 .map(|line| {
155 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
158 whitespace
159 + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
160 })
161 .min()
162 .unwrap_or(usize::MAX)
163 })
164 .min()
165 else {
166 return;
167 };
168
169 for fragment in docs {
170 if fragment.doc == sym::empty {
171 continue;
172 }
173
174 let indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
175 min_indent - add
176 } else {
177 min_indent
178 };
179
180 fragment.indent = indent;
181 }
182}
183
184pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
190 if frag.doc == sym::empty {
191 out.push('\n');
192 return;
193 }
194 let s = frag.doc.as_str();
195 let mut iter = s.lines();
196
197 while let Some(line) = iter.next() {
198 if line.chars().any(|c| !c.is_whitespace()) {
199 assert!(line.len() >= frag.indent);
200 out.push_str(&line[frag.indent..]);
201 } else {
202 out.push_str(line);
203 }
204 out.push('\n');
205 }
206}
207
208pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
209 attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
210 doc_only: bool,
211) -> (Vec<DocFragment>, ThinVec<A>) {
212 let (min_size, max_size) = attrs.size_hint();
213 let size_hint = max_size.unwrap_or(min_size);
214 let mut doc_fragments = Vec::with_capacity(size_hint);
215 let mut other_attrs = ThinVec::<A>::with_capacity(if doc_only { 0 } else { size_hint });
216 for (attr, item_id) in attrs {
217 if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
218 let doc = beautify_doc_string(doc_str, comment_kind);
219 let (span, kind, from_expansion) = if let Some(span) = attr.is_doc_comment() {
220 (span, DocFragmentKind::SugaredDoc, span.from_expansion())
221 } else {
222 let attr_span = attr.span();
223 let (span, from_expansion) = match attr.value_span() {
224 Some(sp) => (sp.with_ctxt(attr_span.ctxt()), sp.from_expansion()),
225 None => (attr_span, attr_span.from_expansion()),
226 };
227 (span, DocFragmentKind::RawDoc, from_expansion)
228 };
229 let fragment = DocFragment { span, doc, kind, item_id, indent: 0, from_expansion };
230 doc_fragments.push(fragment);
231 } else if !doc_only {
232 other_attrs.push(attr.clone());
233 }
234 }
235
236 doc_fragments.shrink_to_fit();
237 other_attrs.shrink_to_fit();
238
239 unindent_doc_fragments(&mut doc_fragments);
240
241 (doc_fragments, other_attrs)
242}
243
244pub fn prepare_to_doc_link_resolution(
250 doc_fragments: &[DocFragment],
251) -> FxIndexMap<Option<DefId>, String> {
252 let mut res = FxIndexMap::default();
253 for fragment in doc_fragments {
254 let out_str = res.entry(fragment.item_id).or_default();
255 add_doc_fragment(out_str, fragment);
256 }
257 res
258}
259
260pub fn main_body_opts() -> Options {
262 Options::ENABLE_TABLES
263 | Options::ENABLE_FOOTNOTES
264 | Options::ENABLE_STRIKETHROUGH
265 | Options::ENABLE_TASKLISTS
266 | Options::ENABLE_SMART_PUNCTUATION
267}
268
269fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<Symbol, MalformedGenerics> {
270 let mut stripped_segment = String::new();
271 let mut param_depth = 0;
272
273 let mut latest_generics_chunk = String::new();
274
275 for c in segment {
276 if c == '<' {
277 param_depth += 1;
278 latest_generics_chunk.clear();
279 } else if c == '>' {
280 param_depth -= 1;
281 if latest_generics_chunk.contains(" as ") {
282 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
285 }
286 } else if param_depth == 0 {
287 stripped_segment.push(c);
288 } else {
289 latest_generics_chunk.push(c);
290 }
291 }
292
293 if param_depth == 0 {
294 Ok(Symbol::intern(&stripped_segment))
295 } else {
296 Err(MalformedGenerics::UnbalancedAngleBrackets)
298 }
299}
300
301pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
302 if !path_str.contains(['<', '>']) {
303 return Ok(path_str.into());
304 }
305 let mut stripped_segments = vec![];
306 let mut path = path_str.chars().peekable();
307 let mut segment = Vec::new();
308
309 while let Some(chr) = path.next() {
310 match chr {
311 ':' => {
312 if path.next_if_eq(&':').is_some() {
313 let stripped_segment =
314 strip_generics_from_path_segment(mem::take(&mut segment))?;
315 if !stripped_segment.is_empty() {
316 stripped_segments.push(stripped_segment);
317 }
318 } else {
319 return Err(MalformedGenerics::InvalidPathSeparator);
320 }
321 }
322 '<' => {
323 segment.push(chr);
324
325 match path.next() {
326 Some('<') => {
327 return Err(MalformedGenerics::TooManyAngleBrackets);
328 }
329 Some('>') => {
330 return Err(MalformedGenerics::EmptyAngleBrackets);
331 }
332 Some(chr) => {
333 segment.push(chr);
334
335 while let Some(chr) = path.next_if(|c| *c != '>') {
336 segment.push(chr);
337 }
338 }
339 None => break,
340 }
341 }
342 _ => segment.push(chr),
343 }
344 trace!("raw segment: {:?}", segment);
345 }
346
347 if !segment.is_empty() {
348 let stripped_segment = strip_generics_from_path_segment(segment)?;
349 if !stripped_segment.is_empty() {
350 stripped_segments.push(stripped_segment);
351 }
352 }
353
354 debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
355
356 if !stripped_segments.is_empty() {
357 let stripped_path = join_path_syms(stripped_segments);
358 Ok(stripped_path.into())
359 } else {
360 Err(MalformedGenerics::MissingType)
361 }
362}
363
364pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
369 for attr in attrs {
370 if let Some(attr_style) = attr.doc_resolution_scope() {
371 return attr_style == ast::AttrStyle::Inner;
372 }
373 }
374 true
375}
376
377pub fn has_primitive_or_keyword_or_attribute_docs(attrs: &[impl AttributeExt]) -> bool {
379 for attr in attrs {
380 if attr.has_name(sym::rustc_doc_primitive) {
381 return true;
382 } else if attr.has_name(sym::doc)
383 && let Some(items) = attr.meta_item_list()
384 {
385 for item in items {
386 if item.has_name(sym::keyword) || item.has_name(sym::attribute) {
387 return true;
388 }
389 }
390 }
391 }
392 false
393}
394
395fn preprocess_link(link: &str) -> Box<str> {
399 let link = link.replace('`', "");
400 let link = link.split('#').next().unwrap();
401 let link = link.trim();
402 let link = link.rsplit('@').next().unwrap();
403 let link = link.strip_suffix("()").unwrap_or(link);
404 let link = link.strip_suffix("{}").unwrap_or(link);
405 let link = link.strip_suffix("[]").unwrap_or(link);
406 let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
407 let link = link.trim();
408 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
409}
410
411pub fn may_be_doc_link(link_type: LinkType) -> bool {
414 match link_type {
415 LinkType::Inline
416 | LinkType::Reference
417 | LinkType::ReferenceUnknown
418 | LinkType::Collapsed
419 | LinkType::CollapsedUnknown
420 | LinkType::Shortcut
421 | LinkType::ShortcutUnknown => true,
422 LinkType::Autolink | LinkType::Email => false,
423 }
424}
425
426pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
429 let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
430 let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
431
432 parse_links(&doc)
433}
434
435fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
438 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
439 let mut event_iter = Parser::new_with_broken_link_callback(
440 doc,
441 main_body_opts(),
442 Some(&mut broken_link_callback),
443 );
444 let mut links = Vec::new();
445
446 let mut refids = UnordSet::default();
447
448 while let Some(event) = event_iter.next() {
449 match event {
450 Event::Start(Tag::Link { link_type, dest_url, title: _, id })
451 if may_be_doc_link(link_type) =>
452 {
453 if matches!(
454 link_type,
455 LinkType::Inline
456 | LinkType::ReferenceUnknown
457 | LinkType::Reference
458 | LinkType::Shortcut
459 | LinkType::ShortcutUnknown
460 ) {
461 if let Some(display_text) = collect_link_data(&mut event_iter) {
462 links.push(display_text);
463 }
464 }
465 if matches!(
466 link_type,
467 LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
468 ) {
469 refids.insert(id);
470 }
471
472 links.push(preprocess_link(&dest_url));
473 }
474 _ => {}
475 }
476 }
477
478 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
479 if !refids.contains(label) {
480 links.push(preprocess_link(&refdef.dest));
481 }
482 }
483
484 links
485}
486
487fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
489 event_iter: &mut Parser<'input, F>,
490) -> Option<Box<str>> {
491 let mut display_text: Option<String> = None;
492 let mut append_text = |text: CowStr<'_>| {
493 if let Some(display_text) = &mut display_text {
494 display_text.push_str(&text);
495 } else {
496 display_text = Some(text.to_string());
497 }
498 };
499
500 while let Some(event) = event_iter.next() {
501 match event {
502 Event::Text(text) => {
503 append_text(text);
504 }
505 Event::Code(code) => {
506 append_text(code);
507 }
508 Event::End(_) => {
509 break;
510 }
511 _ => {}
512 }
513 }
514
515 display_text.map(String::into_boxed_str)
516}
517
518pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
520 let (first_fragment, last_fragment) = match fragments {
521 [] => return None,
522 [first, .., last] => (first, last),
523 [first] => (first, first),
524 };
525 if first_fragment.span == DUMMY_SP {
526 return None;
527 }
528 Some(first_fragment.span.to(last_fragment.span))
529}
530
531pub fn source_span_for_markdown_range(
553 tcx: TyCtxt<'_>,
554 markdown: &str,
555 md_range: &Range<usize>,
556 fragments: &[DocFragment],
557) -> Option<(Span, bool)> {
558 let map = tcx.sess.source_map();
559 source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
560}
561
562pub fn source_span_for_markdown_range_inner(
564 map: &SourceMap,
565 markdown: &str,
566 md_range: &Range<usize>,
567 fragments: &[DocFragment],
568) -> Option<(Span, bool)> {
569 use rustc_span::BytePos;
570
571 if let &[fragment] = &fragments
572 && fragment.kind == DocFragmentKind::RawDoc
573 && let Ok(snippet) = map.span_to_snippet(fragment.span)
574 && snippet.trim_end() == markdown.trim_end()
575 && let Ok(md_range_lo) = u32::try_from(md_range.start)
576 && let Ok(md_range_hi) = u32::try_from(md_range.end)
577 {
578 return Some((
580 Span::new(
581 fragment.span.lo() + rustc_span::BytePos(md_range_lo),
582 fragment.span.lo() + rustc_span::BytePos(md_range_hi),
583 fragment.span.ctxt(),
584 fragment.span.parent(),
585 ),
586 fragment.from_expansion,
587 ));
588 }
589
590 let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
591
592 if !is_all_sugared_doc {
593 let mut match_data = None;
598 let pat = &markdown[md_range.clone()];
599 if pat.is_empty() {
601 return None;
602 }
603 for (i, fragment) in fragments.iter().enumerate() {
604 if let Ok(snippet) = map.span_to_snippet(fragment.span)
605 && let Some(match_start) = snippet.find(pat)
606 {
607 if match_data.is_none()
612 && !snippet.as_bytes()[match_start + 1..]
613 .windows(pat.len())
614 .any(|s| s == pat.as_bytes())
615 {
616 match_data = Some((i, match_start));
617 } else {
618 return None;
620 }
621 }
622 }
623 if let Some((i, match_start)) = match_data {
624 let fragment = &fragments[i];
625 let sp = fragment.span;
626 let lo = sp.lo() + BytePos(match_start as u32);
629 return Some((
630 sp.with_lo(lo).with_hi(lo + BytePos((md_range.end - md_range.start) as u32)),
631 fragment.from_expansion,
632 ));
633 }
634 return None;
635 }
636
637 let snippet = map.span_to_snippet(span_of_fragments(fragments)?).ok()?;
638
639 let starting_line = markdown[..md_range.start].matches('\n').count();
640 let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
641
642 let mut src_lines = snippet.split_terminator('\n');
645 let md_lines = markdown.split_terminator('\n');
646
647 let mut start_bytes = 0;
650 let mut end_bytes = 0;
651
652 'outer: for (line_no, md_line) in md_lines.enumerate() {
653 loop {
654 let source_line = src_lines.next()?;
655 match source_line.find(md_line) {
656 Some(offset) => {
657 if line_no == starting_line {
658 start_bytes += offset;
659
660 if starting_line == ending_line {
661 break 'outer;
662 }
663 } else if line_no == ending_line {
664 end_bytes += offset;
665 break 'outer;
666 } else if line_no < starting_line {
667 start_bytes += source_line.len() - md_line.len();
668 } else {
669 end_bytes += source_line.len() - md_line.len();
670 }
671 break;
672 }
673 None => {
674 if line_no <= starting_line {
677 start_bytes += source_line.len() + 1;
678 } else {
679 end_bytes += source_line.len() + 1;
680 }
681 }
682 }
683 }
684 }
685
686 let span = span_of_fragments(fragments)?;
687 let src_span = span.from_inner(InnerSpan::new(
688 md_range.start + start_bytes,
689 md_range.end + start_bytes + end_bytes,
690 ));
691 Some((
692 src_span,
693 fragments.iter().any(|frag| frag.span.overlaps(src_span) && frag.from_expansion),
694 ))
695}