1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5pub use pulldown_cmark;
7use pulldown_cmark::{
8 BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
9};
10use rustc_ast as ast;
11use rustc_ast::attr::AttributeExt;
12use rustc_ast::join_path_syms;
13use rustc_ast::token::DocFragmentKind;
14use rustc_ast::util::comments::beautify_doc_string;
15use rustc_data_structures::fx::FxIndexMap;
16use rustc_data_structures::unord::UnordSet;
17use rustc_middle::ty::TyCtxt;
18use rustc_span::def_id::DefId;
19use rustc_span::source_map::SourceMap;
20use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
21use thin_vec::ThinVec;
22use tracing::{debug, trace};
23
24#[cfg(test)]
25mod tests;
26
27#[derive(Clone, PartialEq, Eq, Debug)]
36pub struct DocFragment {
37 pub span: Span,
38 pub item_id: Option<DefId>,
45 pub doc: Symbol,
46 pub kind: DocFragmentKind,
47 pub indent: usize,
48 pub from_expansion: bool,
51}
52
53#[derive(Clone, Copy, Debug)]
54pub enum MalformedGenerics {
55 UnbalancedAngleBrackets,
59 MissingType,
65 HasFullyQualifiedSyntax,
72 InvalidPathSeparator,
84 TooManyAngleBrackets,
88 EmptyAngleBrackets,
92}
93
94pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
108 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
121 && docs.iter().any(|d| d.kind.is_sugared())
122 {
123 1
126 } else {
127 0
128 };
129
130 let Some(min_indent) = docs
140 .iter()
141 .map(|fragment| {
142 fragment
143 .doc
144 .as_str()
145 .lines()
146 .filter(|line| line.chars().any(|c| !c.is_whitespace()))
147 .map(|line| {
148 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
151 whitespace + (if fragment.kind.is_sugared() { 0 } else { add })
152 })
153 .min()
154 .unwrap_or(usize::MAX)
155 })
156 .min()
157 else {
158 return;
159 };
160
161 for fragment in docs {
162 if fragment.doc == sym::empty {
163 continue;
164 }
165
166 let indent = if !fragment.kind.is_sugared() && min_indent > 0 {
167 min_indent - add
168 } else {
169 min_indent
170 };
171
172 fragment.indent = indent;
173 }
174}
175
176pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
182 if frag.doc == sym::empty {
183 out.push('\n');
184 return;
185 }
186 let s = frag.doc.as_str();
187 let mut iter = s.lines();
188
189 while let Some(line) = iter.next() {
190 if line.chars().any(|c| !c.is_whitespace()) {
191 assert!(line.len() >= frag.indent);
192 out.push_str(&line[frag.indent..]);
193 } else {
194 out.push_str(line);
195 }
196 out.push('\n');
197 }
198}
199
200pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
201 attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
202 doc_only: bool,
203) -> (Vec<DocFragment>, ThinVec<A>) {
204 let (min_size, max_size) = attrs.size_hint();
205 let size_hint = max_size.unwrap_or(min_size);
206 let mut doc_fragments = Vec::with_capacity(size_hint);
207 let mut other_attrs = ThinVec::<A>::with_capacity(if doc_only { 0 } else { size_hint });
208 for (attr, item_id) in attrs {
209 if let Some((doc_str, fragment_kind)) = attr.doc_str_and_fragment_kind() {
210 let doc = beautify_doc_string(doc_str, fragment_kind.comment_kind());
211 let attr_span = attr.span();
212 let (span, from_expansion) = match fragment_kind {
213 DocFragmentKind::Sugared(_) => (attr_span, attr_span.from_expansion()),
214 DocFragmentKind::Raw(value_span) => {
215 (value_span.with_ctxt(attr_span.ctxt()), value_span.from_expansion())
216 }
217 };
218 let fragment =
219 DocFragment { span, doc, kind: fragment_kind, item_id, indent: 0, from_expansion };
220 doc_fragments.push(fragment);
221 } else if !doc_only {
222 other_attrs.push(attr.clone());
223 }
224 }
225
226 doc_fragments.shrink_to_fit();
227 other_attrs.shrink_to_fit();
228
229 unindent_doc_fragments(&mut doc_fragments);
230
231 (doc_fragments, other_attrs)
232}
233
234pub fn prepare_to_doc_link_resolution(
240 doc_fragments: &[DocFragment],
241) -> FxIndexMap<Option<DefId>, String> {
242 let mut res = FxIndexMap::default();
243 for fragment in doc_fragments {
244 let out_str = res.entry(fragment.item_id).or_default();
245 add_doc_fragment(out_str, fragment);
246 }
247 res
248}
249
250pub fn main_body_opts() -> Options {
252 Options::ENABLE_TABLES
253 | Options::ENABLE_FOOTNOTES
254 | Options::ENABLE_STRIKETHROUGH
255 | Options::ENABLE_TASKLISTS
256 | Options::ENABLE_SMART_PUNCTUATION
257}
258
259fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<Symbol, MalformedGenerics> {
260 let mut stripped_segment = String::new();
261 let mut param_depth = 0;
262
263 let mut latest_generics_chunk = String::new();
264
265 for c in segment {
266 if c == '<' {
267 param_depth += 1;
268 latest_generics_chunk.clear();
269 } else if c == '>' {
270 param_depth -= 1;
271 if latest_generics_chunk.contains(" as ") {
272 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
275 }
276 } else if param_depth == 0 {
277 stripped_segment.push(c);
278 } else {
279 latest_generics_chunk.push(c);
280 }
281 }
282
283 if param_depth == 0 {
284 Ok(Symbol::intern(&stripped_segment))
285 } else {
286 Err(MalformedGenerics::UnbalancedAngleBrackets)
288 }
289}
290
291pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
292 if !path_str.contains(['<', '>']) {
293 return Ok(path_str.into());
294 }
295 let mut stripped_segments = vec![];
296 let mut path = path_str.chars().peekable();
297 let mut segment = Vec::new();
298
299 while let Some(chr) = path.next() {
300 match chr {
301 ':' => {
302 if path.next_if_eq(&':').is_some() {
303 let stripped_segment =
304 strip_generics_from_path_segment(mem::take(&mut segment))?;
305 if !stripped_segment.is_empty() {
306 stripped_segments.push(stripped_segment);
307 }
308 } else {
309 return Err(MalformedGenerics::InvalidPathSeparator);
310 }
311 }
312 '<' => {
313 segment.push(chr);
314
315 match path.next() {
316 Some('<') => {
317 return Err(MalformedGenerics::TooManyAngleBrackets);
318 }
319 Some('>') => {
320 return Err(MalformedGenerics::EmptyAngleBrackets);
321 }
322 Some(chr) => {
323 segment.push(chr);
324
325 while let Some(chr) = path.next_if(|c| *c != '>') {
326 segment.push(chr);
327 }
328 }
329 None => break,
330 }
331 }
332 _ => segment.push(chr),
333 }
334 trace!("raw segment: {:?}", segment);
335 }
336
337 if !segment.is_empty() {
338 let stripped_segment = strip_generics_from_path_segment(segment)?;
339 if !stripped_segment.is_empty() {
340 stripped_segments.push(stripped_segment);
341 }
342 }
343
344 debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
345
346 if !stripped_segments.is_empty() {
347 let stripped_path = join_path_syms(stripped_segments);
348 Ok(stripped_path.into())
349 } else {
350 Err(MalformedGenerics::MissingType)
351 }
352}
353
354pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
359 for attr in attrs {
360 if let Some(attr_style) = attr.doc_resolution_scope() {
361 return attr_style == ast::AttrStyle::Inner;
362 }
363 }
364 true
365}
366
367pub fn has_primitive_or_keyword_or_attribute_docs(attrs: &[impl AttributeExt]) -> bool {
369 for attr in attrs {
370 if attr.has_name(sym::rustc_doc_primitive) || attr.is_doc_keyword_or_attribute() {
371 return true;
372 }
373 }
374 false
375}
376
377fn preprocess_link(link: &str) -> Box<str> {
379 let link = link.replace('`', "");
384 let link = link.split('#').next().unwrap();
385 let link = link.trim();
386 let link = link.split_once('@').map_or(link, |(_, rhs)| rhs);
387 let link = link.trim_suffix("()");
388 let link = link.trim_suffix("{}");
389 let link = link.trim_suffix("[]");
390 let link = if link != "!" { link.trim_suffix('!') } else { link };
391 let link = link.trim();
392 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
393}
394
395pub fn may_be_doc_link(link_type: LinkType) -> bool {
398 match link_type {
399 LinkType::Inline
400 | LinkType::Reference
401 | LinkType::ReferenceUnknown
402 | LinkType::Collapsed
403 | LinkType::CollapsedUnknown
404 | LinkType::Shortcut
405 | LinkType::ShortcutUnknown => true,
406 LinkType::Autolink | LinkType::Email => false,
407 }
408}
409
410pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
413 let (doc_fragments, other_attrs) =
414 attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), false);
415 let mut doc =
416 prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap_or_default();
417
418 for attr in other_attrs {
419 if let Some(note) = attr.deprecation_note() {
420 doc += note.as_str();
421 doc += "\n";
422 }
423 }
424
425 parse_links(&doc)
426}
427
428fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
431 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
432 let mut event_iter = Parser::new_with_broken_link_callback(
433 doc,
434 main_body_opts(),
435 Some(&mut broken_link_callback),
436 );
437 let mut links = Vec::new();
438
439 let mut refids = UnordSet::default();
440
441 while let Some(event) = event_iter.next() {
442 match event {
443 Event::Start(Tag::Link { link_type, dest_url, title: _, id })
444 if may_be_doc_link(link_type) =>
445 {
446 if matches!(
447 link_type,
448 LinkType::Inline
449 | LinkType::ReferenceUnknown
450 | LinkType::Reference
451 | LinkType::Shortcut
452 | LinkType::ShortcutUnknown
453 ) {
454 if let Some(display_text) = collect_link_data(&mut event_iter) {
455 links.push(display_text);
456 }
457 }
458 if matches!(
459 link_type,
460 LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
461 ) {
462 refids.insert(id);
463 }
464
465 links.push(preprocess_link(&dest_url));
466 }
467 _ => {}
468 }
469 }
470
471 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
472 if !refids.contains(label) {
473 links.push(preprocess_link(&refdef.dest));
474 }
475 }
476
477 links
478}
479
480fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
482 event_iter: &mut Parser<'input, F>,
483) -> Option<Box<str>> {
484 let mut display_text: Option<String> = None;
485 let mut append_text = |text: CowStr<'_>| {
486 if let Some(display_text) = &mut display_text {
487 display_text.push_str(&text);
488 } else {
489 display_text = Some(text.to_string());
490 }
491 };
492
493 while let Some(event) = event_iter.next() {
494 match event {
495 Event::Text(text) => {
496 append_text(text);
497 }
498 Event::Code(code) => {
499 append_text(code);
500 }
501 Event::End(_) => {
502 break;
503 }
504 _ => {}
505 }
506 }
507
508 display_text.map(String::into_boxed_str)
509}
510
511pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
513 let (first_fragment, last_fragment) = match fragments {
514 [] => return None,
515 [first, .., last] => (first, last),
516 [first] => (first, first),
517 };
518 if first_fragment.span == DUMMY_SP {
519 return None;
520 }
521 Some(first_fragment.span.to(last_fragment.span))
522}
523
524pub fn source_span_for_markdown_range(
546 tcx: TyCtxt<'_>,
547 markdown: &str,
548 md_range: &Range<usize>,
549 fragments: &[DocFragment],
550) -> Option<(Span, bool)> {
551 let map = tcx.sess.source_map();
552 source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
553}
554
555pub fn source_span_for_markdown_range_inner(
557 map: &SourceMap,
558 markdown: &str,
559 md_range: &Range<usize>,
560 fragments: &[DocFragment],
561) -> Option<(Span, bool)> {
562 use rustc_span::BytePos;
563
564 if let &[fragment] = &fragments
565 && !fragment.kind.is_sugared()
566 && let Ok(snippet) = map.span_to_snippet(fragment.span)
567 && snippet.trim_end() == markdown.trim_end()
568 && let Ok(md_range_lo) = u32::try_from(md_range.start)
569 && let Ok(md_range_hi) = u32::try_from(md_range.end)
570 {
571 return Some((
573 Span::new(
574 fragment.span.lo() + rustc_span::BytePos(md_range_lo),
575 fragment.span.lo() + rustc_span::BytePos(md_range_hi),
576 fragment.span.ctxt(),
577 fragment.span.parent(),
578 ),
579 fragment.from_expansion,
580 ));
581 }
582
583 let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind.is_sugared());
584
585 if !is_all_sugared_doc {
586 let mut match_data = None;
591 let pat = &markdown[md_range.clone()];
592 if pat.is_empty() {
594 return None;
595 }
596 for (i, fragment) in fragments.iter().enumerate() {
597 if let Ok(snippet) = map.span_to_snippet(fragment.span)
598 && let Some(match_start) = snippet.find(pat)
599 {
600 if match_data.is_none()
605 && !snippet.as_bytes()[match_start + 1..]
606 .windows(pat.len())
607 .any(|s| s == pat.as_bytes())
608 {
609 match_data = Some((i, match_start));
610 } else {
611 return None;
613 }
614 }
615 }
616 if let Some((i, match_start)) = match_data {
617 let fragment = &fragments[i];
618 let sp = fragment.span;
619 let lo = sp.lo() + BytePos(match_start as u32);
622 return Some((
623 sp.with_lo(lo).with_hi(lo + BytePos((md_range.end - md_range.start) as u32)),
624 fragment.from_expansion,
625 ));
626 }
627 return None;
628 }
629
630 let snippet = map.span_to_snippet(span_of_fragments(fragments)?).ok()?;
631
632 let starting_line = markdown[..md_range.start].matches('\n').count();
633 let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
634
635 let mut src_lines = snippet.split_terminator('\n');
638 let md_lines = markdown.split_terminator('\n');
639
640 let mut start_bytes = 0;
643 let mut end_bytes = 0;
644
645 'outer: for (line_no, md_line) in md_lines.enumerate() {
646 loop {
647 let source_line = src_lines.next()?;
648 match source_line.find(md_line) {
649 Some(offset) => {
650 if line_no == starting_line {
651 start_bytes += offset;
652
653 if starting_line == ending_line {
654 break 'outer;
655 }
656 } else if line_no == ending_line {
657 end_bytes += offset;
658 break 'outer;
659 } else if line_no < starting_line {
660 start_bytes += source_line.len() - md_line.len();
661 } else {
662 end_bytes += source_line.len() - md_line.len();
663 }
664 break;
665 }
666 None => {
667 if line_no <= starting_line {
670 start_bytes += source_line.len() + 1;
671 } else {
672 end_bytes += source_line.len() + 1;
673 }
674 }
675 }
676 }
677 }
678
679 let span = span_of_fragments(fragments)?;
680 let src_span = span.from_inner(InnerSpan::new(
681 md_range.start + start_bytes,
682 md_range.end + start_bytes + end_bytes,
683 ));
684 Some((
685 src_span,
686 fragments.iter().any(|frag| frag.span.overlaps(src_span) && frag.from_expansion),
687 ))
688}