1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5pub use pulldown_cmark;
7use pulldown_cmark::{
8 BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
9};
10use rustc_ast as ast;
11use rustc_ast::attr::AttributeExt;
12use rustc_ast::join_path_syms;
13use rustc_ast::token::DocFragmentKind;
14use rustc_ast::util::comments::beautify_doc_string;
15use rustc_data_structures::fx::FxIndexMap;
16use rustc_data_structures::unord::UnordSet;
17use rustc_middle::ty::TyCtxt;
18use rustc_span::def_id::DefId;
19use rustc_span::source_map::SourceMap;
20use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
21use thin_vec::ThinVec;
22use tracing::{debug, trace};
23
24#[cfg(test)]
25mod tests;
26
27#[derive(Clone, PartialEq, Eq, Debug)]
36pub struct DocFragment {
37 pub span: Span,
38 pub item_id: Option<DefId>,
45 pub doc: Symbol,
46 pub kind: DocFragmentKind,
47 pub indent: usize,
48 pub from_expansion: bool,
51}
52
53#[derive(Clone, Copy, Debug)]
54pub enum MalformedGenerics {
55 UnbalancedAngleBrackets,
59 MissingType,
65 HasFullyQualifiedSyntax,
72 InvalidPathSeparator,
84 TooManyAngleBrackets,
88 EmptyAngleBrackets,
92}
93
94pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
108 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
121 && docs.iter().any(|d| d.kind.is_sugared())
122 {
123 1
126 } else {
127 0
128 };
129
130 let Some(min_indent) = docs
140 .iter()
141 .map(|fragment| {
142 fragment
143 .doc
144 .as_str()
145 .lines()
146 .filter(|line| line.chars().any(|c| !c.is_whitespace()))
147 .map(|line| {
148 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
151 whitespace + (if fragment.kind.is_sugared() { 0 } else { add })
152 })
153 .min()
154 .unwrap_or(usize::MAX)
155 })
156 .min()
157 else {
158 return;
159 };
160
161 for fragment in docs {
162 if fragment.doc == sym::empty {
163 continue;
164 }
165
166 let indent = if !fragment.kind.is_sugared() && min_indent > 0 {
167 min_indent - add
168 } else {
169 min_indent
170 };
171
172 fragment.indent = indent;
173 }
174}
175
176pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
182 if frag.doc == sym::empty {
183 out.push('\n');
184 return;
185 }
186 let s = frag.doc.as_str();
187 let mut iter = s.lines();
188
189 while let Some(line) = iter.next() {
190 if line.chars().any(|c| !c.is_whitespace()) {
191 assert!(line.len() >= frag.indent);
192 out.push_str(&line[frag.indent..]);
193 } else {
194 out.push_str(line);
195 }
196 out.push('\n');
197 }
198}
199
200pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
201 attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
202 doc_only: bool,
203) -> (Vec<DocFragment>, ThinVec<A>) {
204 let (min_size, max_size) = attrs.size_hint();
205 let size_hint = max_size.unwrap_or(min_size);
206 let mut doc_fragments = Vec::with_capacity(size_hint);
207 let mut other_attrs = ThinVec::<A>::with_capacity(if doc_only { 0 } else { size_hint });
208 for (attr, item_id) in attrs {
209 if let Some((doc_str, fragment_kind)) = attr.doc_str_and_fragment_kind() {
210 let doc = beautify_doc_string(doc_str, fragment_kind.comment_kind());
211 let attr_span = attr.span();
212 let (span, from_expansion) = match fragment_kind {
213 DocFragmentKind::Sugared(_) => (attr_span, attr_span.from_expansion()),
214 DocFragmentKind::Raw(value_span) => {
215 (value_span.with_ctxt(attr_span.ctxt()), value_span.from_expansion())
216 }
217 };
218 let fragment =
219 DocFragment { span, doc, kind: fragment_kind, item_id, indent: 0, from_expansion };
220 doc_fragments.push(fragment);
221 } else if !doc_only {
222 other_attrs.push(attr.clone());
223 }
224 }
225
226 doc_fragments.shrink_to_fit();
227 other_attrs.shrink_to_fit();
228
229 unindent_doc_fragments(&mut doc_fragments);
230
231 (doc_fragments, other_attrs)
232}
233
234pub fn prepare_to_doc_link_resolution(
240 doc_fragments: &[DocFragment],
241) -> FxIndexMap<Option<DefId>, String> {
242 let mut res = FxIndexMap::default();
243 for fragment in doc_fragments {
244 let out_str = res.entry(fragment.item_id).or_default();
245 add_doc_fragment(out_str, fragment);
246 }
247 res
248}
249
250pub fn main_body_opts() -> Options {
252 Options::ENABLE_TABLES
253 | Options::ENABLE_FOOTNOTES
254 | Options::ENABLE_STRIKETHROUGH
255 | Options::ENABLE_TASKLISTS
256 | Options::ENABLE_SMART_PUNCTUATION
257}
258
259fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<Symbol, MalformedGenerics> {
260 let mut stripped_segment = String::new();
261 let mut param_depth = 0;
262
263 let mut latest_generics_chunk = String::new();
264
265 for c in segment {
266 if c == '<' {
267 param_depth += 1;
268 latest_generics_chunk.clear();
269 } else if c == '>' {
270 param_depth -= 1;
271 if latest_generics_chunk.contains(" as ") {
272 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
275 }
276 } else if param_depth == 0 {
277 stripped_segment.push(c);
278 } else {
279 latest_generics_chunk.push(c);
280 }
281 }
282
283 if param_depth == 0 {
284 Ok(Symbol::intern(&stripped_segment))
285 } else {
286 Err(MalformedGenerics::UnbalancedAngleBrackets)
288 }
289}
290
291pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
292 if !path_str.contains(['<', '>']) {
293 return Ok(path_str.into());
294 }
295 let mut stripped_segments = vec![];
296 let mut path = path_str.chars().peekable();
297 let mut segment = Vec::new();
298
299 while let Some(chr) = path.next() {
300 match chr {
301 ':' => {
302 if path.next_if_eq(&':').is_some() {
303 let stripped_segment =
304 strip_generics_from_path_segment(mem::take(&mut segment))?;
305 if !stripped_segment.is_empty() {
306 stripped_segments.push(stripped_segment);
307 }
308 } else {
309 return Err(MalformedGenerics::InvalidPathSeparator);
310 }
311 }
312 '<' => {
313 segment.push(chr);
314
315 match path.next() {
316 Some('<') => {
317 return Err(MalformedGenerics::TooManyAngleBrackets);
318 }
319 Some('>') => {
320 return Err(MalformedGenerics::EmptyAngleBrackets);
321 }
322 Some(chr) => {
323 segment.push(chr);
324
325 while let Some(chr) = path.next_if(|c| *c != '>') {
326 segment.push(chr);
327 }
328 }
329 None => break,
330 }
331 }
332 _ => segment.push(chr),
333 }
334 trace!("raw segment: {:?}", segment);
335 }
336
337 if !segment.is_empty() {
338 let stripped_segment = strip_generics_from_path_segment(segment)?;
339 if !stripped_segment.is_empty() {
340 stripped_segments.push(stripped_segment);
341 }
342 }
343
344 debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
345
346 if !stripped_segments.is_empty() {
347 let stripped_path = join_path_syms(stripped_segments);
348 Ok(stripped_path.into())
349 } else {
350 Err(MalformedGenerics::MissingType)
351 }
352}
353
354pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
359 for attr in attrs {
360 if let Some(attr_style) = attr.doc_resolution_scope() {
361 return attr_style == ast::AttrStyle::Inner;
362 }
363 }
364 true
365}
366
367pub fn has_primitive_or_keyword_or_attribute_docs(attrs: &[impl AttributeExt]) -> bool {
369 for attr in attrs {
370 if attr.has_name(sym::rustc_doc_primitive) || attr.is_doc_keyword_or_attribute() {
371 return true;
372 }
373 }
374 false
375}
376
377fn preprocess_link(link: &str) -> Box<str> {
379 let link = link.replace('`', "");
384 let link = link.split('#').next().unwrap();
385 let link = link.trim();
386 let link = link.split_once('@').map_or(link, |(_, rhs)| rhs);
387 let link = link.trim_suffix("()");
388 let link = link.trim_suffix("{}");
389 let link = link.trim_suffix("[]");
390 let link = if link != "!" { link.trim_suffix('!') } else { link };
391 let link = link.trim();
392 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
393}
394
395pub fn may_be_doc_link(link_type: LinkType) -> bool {
398 match link_type {
399 LinkType::Inline
400 | LinkType::Reference
401 | LinkType::ReferenceUnknown
402 | LinkType::Collapsed
403 | LinkType::CollapsedUnknown
404 | LinkType::Shortcut
405 | LinkType::ShortcutUnknown => true,
406 LinkType::Autolink | LinkType::Email => false,
407 }
408}
409
410pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
413 let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
414 let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
415
416 parse_links(&doc)
417}
418
419fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
422 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
423 let mut event_iter = Parser::new_with_broken_link_callback(
424 doc,
425 main_body_opts(),
426 Some(&mut broken_link_callback),
427 );
428 let mut links = Vec::new();
429
430 let mut refids = UnordSet::default();
431
432 while let Some(event) = event_iter.next() {
433 match event {
434 Event::Start(Tag::Link { link_type, dest_url, title: _, id })
435 if may_be_doc_link(link_type) =>
436 {
437 if matches!(
438 link_type,
439 LinkType::Inline
440 | LinkType::ReferenceUnknown
441 | LinkType::Reference
442 | LinkType::Shortcut
443 | LinkType::ShortcutUnknown
444 ) {
445 if let Some(display_text) = collect_link_data(&mut event_iter) {
446 links.push(display_text);
447 }
448 }
449 if matches!(
450 link_type,
451 LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
452 ) {
453 refids.insert(id);
454 }
455
456 links.push(preprocess_link(&dest_url));
457 }
458 _ => {}
459 }
460 }
461
462 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
463 if !refids.contains(label) {
464 links.push(preprocess_link(&refdef.dest));
465 }
466 }
467
468 links
469}
470
471fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
473 event_iter: &mut Parser<'input, F>,
474) -> Option<Box<str>> {
475 let mut display_text: Option<String> = None;
476 let mut append_text = |text: CowStr<'_>| {
477 if let Some(display_text) = &mut display_text {
478 display_text.push_str(&text);
479 } else {
480 display_text = Some(text.to_string());
481 }
482 };
483
484 while let Some(event) = event_iter.next() {
485 match event {
486 Event::Text(text) => {
487 append_text(text);
488 }
489 Event::Code(code) => {
490 append_text(code);
491 }
492 Event::End(_) => {
493 break;
494 }
495 _ => {}
496 }
497 }
498
499 display_text.map(String::into_boxed_str)
500}
501
502pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
504 let (first_fragment, last_fragment) = match fragments {
505 [] => return None,
506 [first, .., last] => (first, last),
507 [first] => (first, first),
508 };
509 if first_fragment.span == DUMMY_SP {
510 return None;
511 }
512 Some(first_fragment.span.to(last_fragment.span))
513}
514
515pub fn source_span_for_markdown_range(
537 tcx: TyCtxt<'_>,
538 markdown: &str,
539 md_range: &Range<usize>,
540 fragments: &[DocFragment],
541) -> Option<(Span, bool)> {
542 let map = tcx.sess.source_map();
543 source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
544}
545
546pub fn source_span_for_markdown_range_inner(
548 map: &SourceMap,
549 markdown: &str,
550 md_range: &Range<usize>,
551 fragments: &[DocFragment],
552) -> Option<(Span, bool)> {
553 use rustc_span::BytePos;
554
555 if let &[fragment] = &fragments
556 && !fragment.kind.is_sugared()
557 && let Ok(snippet) = map.span_to_snippet(fragment.span)
558 && snippet.trim_end() == markdown.trim_end()
559 && let Ok(md_range_lo) = u32::try_from(md_range.start)
560 && let Ok(md_range_hi) = u32::try_from(md_range.end)
561 {
562 return Some((
564 Span::new(
565 fragment.span.lo() + rustc_span::BytePos(md_range_lo),
566 fragment.span.lo() + rustc_span::BytePos(md_range_hi),
567 fragment.span.ctxt(),
568 fragment.span.parent(),
569 ),
570 fragment.from_expansion,
571 ));
572 }
573
574 let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind.is_sugared());
575
576 if !is_all_sugared_doc {
577 let mut match_data = None;
582 let pat = &markdown[md_range.clone()];
583 if pat.is_empty() {
585 return None;
586 }
587 for (i, fragment) in fragments.iter().enumerate() {
588 if let Ok(snippet) = map.span_to_snippet(fragment.span)
589 && let Some(match_start) = snippet.find(pat)
590 {
591 if match_data.is_none()
596 && !snippet.as_bytes()[match_start + 1..]
597 .windows(pat.len())
598 .any(|s| s == pat.as_bytes())
599 {
600 match_data = Some((i, match_start));
601 } else {
602 return None;
604 }
605 }
606 }
607 if let Some((i, match_start)) = match_data {
608 let fragment = &fragments[i];
609 let sp = fragment.span;
610 let lo = sp.lo() + BytePos(match_start as u32);
613 return Some((
614 sp.with_lo(lo).with_hi(lo + BytePos((md_range.end - md_range.start) as u32)),
615 fragment.from_expansion,
616 ));
617 }
618 return None;
619 }
620
621 let snippet = map.span_to_snippet(span_of_fragments(fragments)?).ok()?;
622
623 let starting_line = markdown[..md_range.start].matches('\n').count();
624 let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
625
626 let mut src_lines = snippet.split_terminator('\n');
629 let md_lines = markdown.split_terminator('\n');
630
631 let mut start_bytes = 0;
634 let mut end_bytes = 0;
635
636 'outer: for (line_no, md_line) in md_lines.enumerate() {
637 loop {
638 let source_line = src_lines.next()?;
639 match source_line.find(md_line) {
640 Some(offset) => {
641 if line_no == starting_line {
642 start_bytes += offset;
643
644 if starting_line == ending_line {
645 break 'outer;
646 }
647 } else if line_no == ending_line {
648 end_bytes += offset;
649 break 'outer;
650 } else if line_no < starting_line {
651 start_bytes += source_line.len() - md_line.len();
652 } else {
653 end_bytes += source_line.len() - md_line.len();
654 }
655 break;
656 }
657 None => {
658 if line_no <= starting_line {
661 start_bytes += source_line.len() + 1;
662 } else {
663 end_bytes += source_line.len() + 1;
664 }
665 }
666 }
667 }
668 }
669
670 let span = span_of_fragments(fragments)?;
671 let src_span = span.from_inner(InnerSpan::new(
672 md_range.start + start_bytes,
673 md_range.end + start_bytes + end_bytes,
674 ));
675 Some((
676 src_span,
677 fragments.iter().any(|frag| frag.span.overlaps(src_span) && frag.from_expansion),
678 ))
679}