1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5use pulldown_cmark::{
6 BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
7};
8use rustc_ast as ast;
9use rustc_ast::attr::AttributeExt;
10use rustc_ast::join_path_syms;
11use rustc_ast::util::comments::beautify_doc_string;
12use rustc_data_structures::fx::FxIndexMap;
13use rustc_data_structures::unord::UnordSet;
14use rustc_middle::ty::TyCtxt;
15use rustc_span::def_id::DefId;
16use rustc_span::source_map::SourceMap;
17use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
18use thin_vec::ThinVec;
19use tracing::{debug, trace};
20
21#[cfg(test)]
22mod tests;
23
24#[derive(Clone, Copy, PartialEq, Eq, Debug)]
25pub enum DocFragmentKind {
26 SugaredDoc,
28 RawDoc,
30}
31
32#[derive(Clone, PartialEq, Eq, Debug)]
41pub struct DocFragment {
42 pub span: Span,
43 pub item_id: Option<DefId>,
50 pub doc: Symbol,
51 pub kind: DocFragmentKind,
52 pub indent: usize,
53 pub from_expansion: bool,
56}
57
58#[derive(Clone, Copy, Debug)]
59pub enum MalformedGenerics {
60 UnbalancedAngleBrackets,
64 MissingType,
70 HasFullyQualifiedSyntax,
77 InvalidPathSeparator,
89 TooManyAngleBrackets,
93 EmptyAngleBrackets,
97}
98
99pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
113 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
126 && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
127 {
128 1
131 } else {
132 0
133 };
134
135 let Some(min_indent) = docs
145 .iter()
146 .map(|fragment| {
147 fragment
148 .doc
149 .as_str()
150 .lines()
151 .filter(|line| line.chars().any(|c| !c.is_whitespace()))
152 .map(|line| {
153 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
156 whitespace
157 + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
158 })
159 .min()
160 .unwrap_or(usize::MAX)
161 })
162 .min()
163 else {
164 return;
165 };
166
167 for fragment in docs {
168 if fragment.doc == sym::empty {
169 continue;
170 }
171
172 let indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
173 min_indent - add
174 } else {
175 min_indent
176 };
177
178 fragment.indent = indent;
179 }
180}
181
182pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
188 if frag.doc == sym::empty {
189 out.push('\n');
190 return;
191 }
192 let s = frag.doc.as_str();
193 let mut iter = s.lines();
194
195 while let Some(line) = iter.next() {
196 if line.chars().any(|c| !c.is_whitespace()) {
197 assert!(line.len() >= frag.indent);
198 out.push_str(&line[frag.indent..]);
199 } else {
200 out.push_str(line);
201 }
202 out.push('\n');
203 }
204}
205
206pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
207 attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
208 doc_only: bool,
209) -> (Vec<DocFragment>, ThinVec<A>) {
210 let (min_size, max_size) = attrs.size_hint();
211 let size_hint = max_size.unwrap_or(min_size);
212 let mut doc_fragments = Vec::with_capacity(size_hint);
213 let mut other_attrs = ThinVec::<A>::with_capacity(if doc_only { 0 } else { size_hint });
214 for (attr, item_id) in attrs {
215 if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
216 let doc = beautify_doc_string(doc_str, comment_kind);
217 let (span, kind, from_expansion) = if attr.is_doc_comment() {
218 let span = attr.span();
219 (span, DocFragmentKind::SugaredDoc, span.from_expansion())
220 } else {
221 let attr_span = attr.span();
222 let (span, from_expansion) = match attr.value_span() {
223 Some(sp) => (sp.with_ctxt(attr_span.ctxt()), sp.from_expansion()),
224 None => (attr_span, attr_span.from_expansion()),
225 };
226 (span, DocFragmentKind::RawDoc, from_expansion)
227 };
228 let fragment = DocFragment { span, doc, kind, item_id, indent: 0, from_expansion };
229 doc_fragments.push(fragment);
230 } else if !doc_only {
231 other_attrs.push(attr.clone());
232 }
233 }
234
235 doc_fragments.shrink_to_fit();
236 other_attrs.shrink_to_fit();
237
238 unindent_doc_fragments(&mut doc_fragments);
239
240 (doc_fragments, other_attrs)
241}
242
243pub fn prepare_to_doc_link_resolution(
249 doc_fragments: &[DocFragment],
250) -> FxIndexMap<Option<DefId>, String> {
251 let mut res = FxIndexMap::default();
252 for fragment in doc_fragments {
253 let out_str = res.entry(fragment.item_id).or_default();
254 add_doc_fragment(out_str, fragment);
255 }
256 res
257}
258
259pub fn main_body_opts() -> Options {
261 Options::ENABLE_TABLES
262 | Options::ENABLE_FOOTNOTES
263 | Options::ENABLE_STRIKETHROUGH
264 | Options::ENABLE_TASKLISTS
265 | Options::ENABLE_SMART_PUNCTUATION
266}
267
268fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<Symbol, MalformedGenerics> {
269 let mut stripped_segment = String::new();
270 let mut param_depth = 0;
271
272 let mut latest_generics_chunk = String::new();
273
274 for c in segment {
275 if c == '<' {
276 param_depth += 1;
277 latest_generics_chunk.clear();
278 } else if c == '>' {
279 param_depth -= 1;
280 if latest_generics_chunk.contains(" as ") {
281 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
284 }
285 } else if param_depth == 0 {
286 stripped_segment.push(c);
287 } else {
288 latest_generics_chunk.push(c);
289 }
290 }
291
292 if param_depth == 0 {
293 Ok(Symbol::intern(&stripped_segment))
294 } else {
295 Err(MalformedGenerics::UnbalancedAngleBrackets)
297 }
298}
299
300pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
301 if !path_str.contains(['<', '>']) {
302 return Ok(path_str.into());
303 }
304 let mut stripped_segments = vec![];
305 let mut path = path_str.chars().peekable();
306 let mut segment = Vec::new();
307
308 while let Some(chr) = path.next() {
309 match chr {
310 ':' => {
311 if path.next_if_eq(&':').is_some() {
312 let stripped_segment =
313 strip_generics_from_path_segment(mem::take(&mut segment))?;
314 if !stripped_segment.is_empty() {
315 stripped_segments.push(stripped_segment);
316 }
317 } else {
318 return Err(MalformedGenerics::InvalidPathSeparator);
319 }
320 }
321 '<' => {
322 segment.push(chr);
323
324 match path.next() {
325 Some('<') => {
326 return Err(MalformedGenerics::TooManyAngleBrackets);
327 }
328 Some('>') => {
329 return Err(MalformedGenerics::EmptyAngleBrackets);
330 }
331 Some(chr) => {
332 segment.push(chr);
333
334 while let Some(chr) = path.next_if(|c| *c != '>') {
335 segment.push(chr);
336 }
337 }
338 None => break,
339 }
340 }
341 _ => segment.push(chr),
342 }
343 trace!("raw segment: {:?}", segment);
344 }
345
346 if !segment.is_empty() {
347 let stripped_segment = strip_generics_from_path_segment(segment)?;
348 if !stripped_segment.is_empty() {
349 stripped_segments.push(stripped_segment);
350 }
351 }
352
353 debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
354
355 if !stripped_segments.is_empty() {
356 let stripped_path = join_path_syms(stripped_segments);
357 Ok(stripped_path.into())
358 } else {
359 Err(MalformedGenerics::MissingType)
360 }
361}
362
363pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
368 for attr in attrs {
369 if let Some(attr_style) = attr.doc_resolution_scope() {
370 return attr_style == ast::AttrStyle::Inner;
371 }
372 }
373 true
374}
375
376pub fn has_primitive_or_keyword_or_attribute_docs(attrs: &[impl AttributeExt]) -> bool {
378 for attr in attrs {
379 if attr.has_name(sym::rustc_doc_primitive) {
380 return true;
381 } else if attr.has_name(sym::doc)
382 && let Some(items) = attr.meta_item_list()
383 {
384 for item in items {
385 if item.has_name(sym::keyword) || item.has_name(sym::attribute) {
386 return true;
387 }
388 }
389 }
390 }
391 false
392}
393
394fn preprocess_link(link: &str) -> Box<str> {
398 let link = link.replace('`', "");
399 let link = link.split('#').next().unwrap();
400 let link = link.trim();
401 let link = link.rsplit('@').next().unwrap();
402 let link = link.strip_suffix("()").unwrap_or(link);
403 let link = link.strip_suffix("{}").unwrap_or(link);
404 let link = link.strip_suffix("[]").unwrap_or(link);
405 let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
406 let link = link.trim();
407 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
408}
409
410pub fn may_be_doc_link(link_type: LinkType) -> bool {
413 match link_type {
414 LinkType::Inline
415 | LinkType::Reference
416 | LinkType::ReferenceUnknown
417 | LinkType::Collapsed
418 | LinkType::CollapsedUnknown
419 | LinkType::Shortcut
420 | LinkType::ShortcutUnknown => true,
421 LinkType::Autolink | LinkType::Email => false,
422 }
423}
424
425pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
428 let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
429 let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
430
431 parse_links(&doc)
432}
433
434fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
437 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
438 let mut event_iter = Parser::new_with_broken_link_callback(
439 doc,
440 main_body_opts(),
441 Some(&mut broken_link_callback),
442 );
443 let mut links = Vec::new();
444
445 let mut refids = UnordSet::default();
446
447 while let Some(event) = event_iter.next() {
448 match event {
449 Event::Start(Tag::Link { link_type, dest_url, title: _, id })
450 if may_be_doc_link(link_type) =>
451 {
452 if matches!(
453 link_type,
454 LinkType::Inline
455 | LinkType::ReferenceUnknown
456 | LinkType::Reference
457 | LinkType::Shortcut
458 | LinkType::ShortcutUnknown
459 ) {
460 if let Some(display_text) = collect_link_data(&mut event_iter) {
461 links.push(display_text);
462 }
463 }
464 if matches!(
465 link_type,
466 LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
467 ) {
468 refids.insert(id);
469 }
470
471 links.push(preprocess_link(&dest_url));
472 }
473 _ => {}
474 }
475 }
476
477 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
478 if !refids.contains(label) {
479 links.push(preprocess_link(&refdef.dest));
480 }
481 }
482
483 links
484}
485
486fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
488 event_iter: &mut Parser<'input, F>,
489) -> Option<Box<str>> {
490 let mut display_text: Option<String> = None;
491 let mut append_text = |text: CowStr<'_>| {
492 if let Some(display_text) = &mut display_text {
493 display_text.push_str(&text);
494 } else {
495 display_text = Some(text.to_string());
496 }
497 };
498
499 while let Some(event) = event_iter.next() {
500 match event {
501 Event::Text(text) => {
502 append_text(text);
503 }
504 Event::Code(code) => {
505 append_text(code);
506 }
507 Event::End(_) => {
508 break;
509 }
510 _ => {}
511 }
512 }
513
514 display_text.map(String::into_boxed_str)
515}
516
517pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
519 let (first_fragment, last_fragment) = match fragments {
520 [] => return None,
521 [first, .., last] => (first, last),
522 [first] => (first, first),
523 };
524 if first_fragment.span == DUMMY_SP {
525 return None;
526 }
527 Some(first_fragment.span.to(last_fragment.span))
528}
529
530pub fn source_span_for_markdown_range(
552 tcx: TyCtxt<'_>,
553 markdown: &str,
554 md_range: &Range<usize>,
555 fragments: &[DocFragment],
556) -> Option<(Span, bool)> {
557 let map = tcx.sess.source_map();
558 source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
559}
560
561pub fn source_span_for_markdown_range_inner(
563 map: &SourceMap,
564 markdown: &str,
565 md_range: &Range<usize>,
566 fragments: &[DocFragment],
567) -> Option<(Span, bool)> {
568 use rustc_span::BytePos;
569
570 if let &[fragment] = &fragments
571 && fragment.kind == DocFragmentKind::RawDoc
572 && let Ok(snippet) = map.span_to_snippet(fragment.span)
573 && snippet.trim_end() == markdown.trim_end()
574 && let Ok(md_range_lo) = u32::try_from(md_range.start)
575 && let Ok(md_range_hi) = u32::try_from(md_range.end)
576 {
577 return Some((
579 Span::new(
580 fragment.span.lo() + rustc_span::BytePos(md_range_lo),
581 fragment.span.lo() + rustc_span::BytePos(md_range_hi),
582 fragment.span.ctxt(),
583 fragment.span.parent(),
584 ),
585 fragment.from_expansion,
586 ));
587 }
588
589 let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
590
591 if !is_all_sugared_doc {
592 let mut match_data = None;
597 let pat = &markdown[md_range.clone()];
598 if pat.is_empty() {
600 return None;
601 }
602 for (i, fragment) in fragments.iter().enumerate() {
603 if let Ok(snippet) = map.span_to_snippet(fragment.span)
604 && let Some(match_start) = snippet.find(pat)
605 {
606 if match_data.is_none()
611 && !snippet.as_bytes()[match_start + 1..]
612 .windows(pat.len())
613 .any(|s| s == pat.as_bytes())
614 {
615 match_data = Some((i, match_start));
616 } else {
617 return None;
619 }
620 }
621 }
622 if let Some((i, match_start)) = match_data {
623 let fragment = &fragments[i];
624 let sp = fragment.span;
625 let lo = sp.lo() + BytePos(match_start as u32);
628 return Some((
629 sp.with_lo(lo).with_hi(lo + BytePos((md_range.end - md_range.start) as u32)),
630 fragment.from_expansion,
631 ));
632 }
633 return None;
634 }
635
636 let snippet = map.span_to_snippet(span_of_fragments(fragments)?).ok()?;
637
638 let starting_line = markdown[..md_range.start].matches('\n').count();
639 let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
640
641 let mut src_lines = snippet.split_terminator('\n');
644 let md_lines = markdown.split_terminator('\n');
645
646 let mut start_bytes = 0;
649 let mut end_bytes = 0;
650
651 'outer: for (line_no, md_line) in md_lines.enumerate() {
652 loop {
653 let source_line = src_lines.next()?;
654 match source_line.find(md_line) {
655 Some(offset) => {
656 if line_no == starting_line {
657 start_bytes += offset;
658
659 if starting_line == ending_line {
660 break 'outer;
661 }
662 } else if line_no == ending_line {
663 end_bytes += offset;
664 break 'outer;
665 } else if line_no < starting_line {
666 start_bytes += source_line.len() - md_line.len();
667 } else {
668 end_bytes += source_line.len() - md_line.len();
669 }
670 break;
671 }
672 None => {
673 if line_no <= starting_line {
676 start_bytes += source_line.len() + 1;
677 } else {
678 end_bytes += source_line.len() + 1;
679 }
680 }
681 }
682 }
683 }
684
685 let span = span_of_fragments(fragments)?;
686 let src_span = span.from_inner(InnerSpan::new(
687 md_range.start + start_bytes,
688 md_range.end + start_bytes + end_bytes,
689 ));
690 Some((
691 src_span,
692 fragments.iter().any(|frag| frag.span.overlaps(src_span) && frag.from_expansion),
693 ))
694}