1use std::mem;
2use std::ops::Range;
3
4use itertools::Itertools;
5use pulldown_cmark::{
6 BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
7};
8use rustc_ast as ast;
9use rustc_ast::attr::AttributeExt;
10use rustc_ast::util::comments::beautify_doc_string;
11use rustc_data_structures::fx::FxIndexMap;
12use rustc_data_structures::unord::UnordSet;
13use rustc_middle::ty::TyCtxt;
14use rustc_span::def_id::DefId;
15use rustc_span::source_map::SourceMap;
16use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, sym};
17use thin_vec::ThinVec;
18use tracing::{debug, trace};
19
20#[cfg(test)]
21mod tests;
22
23#[derive(Clone, Copy, PartialEq, Eq, Debug)]
24pub enum DocFragmentKind {
25 SugaredDoc,
27 RawDoc,
29}
30
31#[derive(Clone, PartialEq, Eq, Debug)]
40pub struct DocFragment {
41 pub span: Span,
42 pub item_id: Option<DefId>,
49 pub doc: Symbol,
50 pub kind: DocFragmentKind,
51 pub indent: usize,
52}
53
54#[derive(Clone, Copy, Debug)]
55pub enum MalformedGenerics {
56 UnbalancedAngleBrackets,
60 MissingType,
66 HasFullyQualifiedSyntax,
73 InvalidPathSeparator,
85 TooManyAngleBrackets,
89 EmptyAngleBrackets,
93}
94
95pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
109 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
122 && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
123 {
124 1
127 } else {
128 0
129 };
130
131 let Some(min_indent) = docs
141 .iter()
142 .map(|fragment| {
143 fragment
144 .doc
145 .as_str()
146 .lines()
147 .filter(|line| line.chars().any(|c| !c.is_whitespace()))
148 .map(|line| {
149 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
152 whitespace
153 + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
154 })
155 .min()
156 .unwrap_or(usize::MAX)
157 })
158 .min()
159 else {
160 return;
161 };
162
163 for fragment in docs {
164 if fragment.doc == sym::empty {
165 continue;
166 }
167
168 let indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
169 min_indent - add
170 } else {
171 min_indent
172 };
173
174 fragment.indent = indent;
175 }
176}
177
178pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
184 if frag.doc == sym::empty {
185 out.push('\n');
186 return;
187 }
188 let s = frag.doc.as_str();
189 let mut iter = s.lines();
190
191 while let Some(line) = iter.next() {
192 if line.chars().any(|c| !c.is_whitespace()) {
193 assert!(line.len() >= frag.indent);
194 out.push_str(&line[frag.indent..]);
195 } else {
196 out.push_str(line);
197 }
198 out.push('\n');
199 }
200}
201
202pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
203 attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
204 doc_only: bool,
205) -> (Vec<DocFragment>, ThinVec<A>) {
206 let mut doc_fragments = Vec::new();
207 let mut other_attrs = ThinVec::<A>::new();
208 for (attr, item_id) in attrs {
209 if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
210 let doc = beautify_doc_string(doc_str, comment_kind);
211 let (span, kind) = if attr.is_doc_comment() {
212 (attr.span(), DocFragmentKind::SugaredDoc)
213 } else {
214 (
215 attr.value_span()
216 .map(|i| i.with_ctxt(attr.span().ctxt()))
217 .unwrap_or(attr.span()),
218 DocFragmentKind::RawDoc,
219 )
220 };
221 let fragment = DocFragment { span, doc, kind, item_id, indent: 0 };
222 doc_fragments.push(fragment);
223 } else if !doc_only {
224 other_attrs.push(attr.clone());
225 }
226 }
227
228 unindent_doc_fragments(&mut doc_fragments);
229
230 (doc_fragments, other_attrs)
231}
232
233pub fn prepare_to_doc_link_resolution(
239 doc_fragments: &[DocFragment],
240) -> FxIndexMap<Option<DefId>, String> {
241 let mut res = FxIndexMap::default();
242 for fragment in doc_fragments {
243 let out_str = res.entry(fragment.item_id).or_default();
244 add_doc_fragment(out_str, fragment);
245 }
246 res
247}
248
249pub fn main_body_opts() -> Options {
251 Options::ENABLE_TABLES
252 | Options::ENABLE_FOOTNOTES
253 | Options::ENABLE_STRIKETHROUGH
254 | Options::ENABLE_TASKLISTS
255 | Options::ENABLE_SMART_PUNCTUATION
256}
257
258fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
259 let mut stripped_segment = String::new();
260 let mut param_depth = 0;
261
262 let mut latest_generics_chunk = String::new();
263
264 for c in segment {
265 if c == '<' {
266 param_depth += 1;
267 latest_generics_chunk.clear();
268 } else if c == '>' {
269 param_depth -= 1;
270 if latest_generics_chunk.contains(" as ") {
271 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
274 }
275 } else if param_depth == 0 {
276 stripped_segment.push(c);
277 } else {
278 latest_generics_chunk.push(c);
279 }
280 }
281
282 if param_depth == 0 {
283 Ok(stripped_segment)
284 } else {
285 Err(MalformedGenerics::UnbalancedAngleBrackets)
287 }
288}
289
290pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
291 if !path_str.contains(['<', '>']) {
292 return Ok(path_str.into());
293 }
294 let mut stripped_segments = vec![];
295 let mut path = path_str.chars().peekable();
296 let mut segment = Vec::new();
297
298 while let Some(chr) = path.next() {
299 match chr {
300 ':' => {
301 if path.next_if_eq(&':').is_some() {
302 let stripped_segment =
303 strip_generics_from_path_segment(mem::take(&mut segment))?;
304 if !stripped_segment.is_empty() {
305 stripped_segments.push(stripped_segment);
306 }
307 } else {
308 return Err(MalformedGenerics::InvalidPathSeparator);
309 }
310 }
311 '<' => {
312 segment.push(chr);
313
314 match path.next() {
315 Some('<') => {
316 return Err(MalformedGenerics::TooManyAngleBrackets);
317 }
318 Some('>') => {
319 return Err(MalformedGenerics::EmptyAngleBrackets);
320 }
321 Some(chr) => {
322 segment.push(chr);
323
324 while let Some(chr) = path.next_if(|c| *c != '>') {
325 segment.push(chr);
326 }
327 }
328 None => break,
329 }
330 }
331 _ => segment.push(chr),
332 }
333 trace!("raw segment: {:?}", segment);
334 }
335
336 if !segment.is_empty() {
337 let stripped_segment = strip_generics_from_path_segment(segment)?;
338 if !stripped_segment.is_empty() {
339 stripped_segments.push(stripped_segment);
340 }
341 }
342
343 debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
344
345 let stripped_path = stripped_segments.join("::");
346
347 if !stripped_path.is_empty() {
348 Ok(stripped_path.into())
349 } else {
350 Err(MalformedGenerics::MissingType)
351 }
352}
353
354pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
359 for attr in attrs {
360 if let Some(attr_style) = attr.doc_resolution_scope() {
361 return attr_style == ast::AttrStyle::Inner;
362 }
363 }
364 true
365}
366
367pub fn has_primitive_or_keyword_docs(attrs: &[impl AttributeExt]) -> bool {
369 for attr in attrs {
370 if attr.has_name(sym::rustc_doc_primitive) {
371 return true;
372 } else if attr.has_name(sym::doc)
373 && let Some(items) = attr.meta_item_list()
374 {
375 for item in items {
376 if item.has_name(sym::keyword) {
377 return true;
378 }
379 }
380 }
381 }
382 false
383}
384
385fn preprocess_link(link: &str) -> Box<str> {
389 let link = link.replace('`', "");
390 let link = link.split('#').next().unwrap();
391 let link = link.trim();
392 let link = link.rsplit('@').next().unwrap();
393 let link = link.strip_suffix("()").unwrap_or(link);
394 let link = link.strip_suffix("{}").unwrap_or(link);
395 let link = link.strip_suffix("[]").unwrap_or(link);
396 let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
397 let link = link.trim();
398 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
399}
400
401pub fn may_be_doc_link(link_type: LinkType) -> bool {
404 match link_type {
405 LinkType::Inline
406 | LinkType::Reference
407 | LinkType::ReferenceUnknown
408 | LinkType::Collapsed
409 | LinkType::CollapsedUnknown
410 | LinkType::Shortcut
411 | LinkType::ShortcutUnknown => true,
412 LinkType::Autolink | LinkType::Email => false,
413 }
414}
415
416pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
419 let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
420 let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
421
422 parse_links(&doc)
423}
424
425fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
428 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
429 let mut event_iter = Parser::new_with_broken_link_callback(
430 doc,
431 main_body_opts(),
432 Some(&mut broken_link_callback),
433 );
434 let mut links = Vec::new();
435
436 let mut refids = UnordSet::default();
437
438 while let Some(event) = event_iter.next() {
439 match event {
440 Event::Start(Tag::Link { link_type, dest_url, title: _, id })
441 if may_be_doc_link(link_type) =>
442 {
443 if matches!(
444 link_type,
445 LinkType::Inline
446 | LinkType::ReferenceUnknown
447 | LinkType::Reference
448 | LinkType::Shortcut
449 | LinkType::ShortcutUnknown
450 ) {
451 if let Some(display_text) = collect_link_data(&mut event_iter) {
452 links.push(display_text);
453 }
454 }
455 if matches!(
456 link_type,
457 LinkType::Reference | LinkType::Shortcut | LinkType::Collapsed
458 ) {
459 refids.insert(id);
460 }
461
462 links.push(preprocess_link(&dest_url));
463 }
464 _ => {}
465 }
466 }
467
468 for (label, refdef) in event_iter.reference_definitions().iter().sorted_by_key(|x| x.0) {
469 if !refids.contains(label) {
470 links.push(preprocess_link(&refdef.dest));
471 }
472 }
473
474 links
475}
476
477fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
479 event_iter: &mut Parser<'input, F>,
480) -> Option<Box<str>> {
481 let mut display_text: Option<String> = None;
482 let mut append_text = |text: CowStr<'_>| {
483 if let Some(display_text) = &mut display_text {
484 display_text.push_str(&text);
485 } else {
486 display_text = Some(text.to_string());
487 }
488 };
489
490 while let Some(event) = event_iter.next() {
491 match event {
492 Event::Text(text) => {
493 append_text(text);
494 }
495 Event::Code(code) => {
496 append_text(code);
497 }
498 Event::End(_) => {
499 break;
500 }
501 _ => {}
502 }
503 }
504
505 display_text.map(String::into_boxed_str)
506}
507
508pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
510 if fragments.is_empty() {
511 return None;
512 }
513 let start = fragments[0].span;
514 if start == DUMMY_SP {
515 return None;
516 }
517 let end = fragments.last().expect("no doc strings provided").span;
518 Some(start.to(end))
519}
520
521pub fn source_span_for_markdown_range(
539 tcx: TyCtxt<'_>,
540 markdown: &str,
541 md_range: &Range<usize>,
542 fragments: &[DocFragment],
543) -> Option<Span> {
544 let map = tcx.sess.source_map();
545 source_span_for_markdown_range_inner(map, markdown, md_range, fragments)
546}
547
548pub fn source_span_for_markdown_range_inner(
550 map: &SourceMap,
551 markdown: &str,
552 md_range: &Range<usize>,
553 fragments: &[DocFragment],
554) -> Option<Span> {
555 use rustc_span::BytePos;
556
557 if let &[fragment] = &fragments
558 && fragment.kind == DocFragmentKind::RawDoc
559 && let Ok(snippet) = map.span_to_snippet(fragment.span)
560 && snippet.trim_end() == markdown.trim_end()
561 && let Ok(md_range_lo) = u32::try_from(md_range.start)
562 && let Ok(md_range_hi) = u32::try_from(md_range.end)
563 {
564 return Some(Span::new(
566 fragment.span.lo() + rustc_span::BytePos(md_range_lo),
567 fragment.span.lo() + rustc_span::BytePos(md_range_hi),
568 fragment.span.ctxt(),
569 fragment.span.parent(),
570 ));
571 }
572
573 let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
574
575 if !is_all_sugared_doc {
576 let mut match_data = None;
581 let pat = &markdown[md_range.clone()];
582 if pat.is_empty() {
584 return None;
585 }
586 for (i, fragment) in fragments.iter().enumerate() {
587 if let Ok(snippet) = map.span_to_snippet(fragment.span)
588 && let Some(match_start) = snippet.find(pat)
589 {
590 if match_data.is_none()
595 && !snippet.as_bytes()[match_start + 1..]
596 .windows(pat.len())
597 .any(|s| s == pat.as_bytes())
598 {
599 match_data = Some((i, match_start));
600 } else {
601 return None;
603 }
604 }
605 }
606 if let Some((i, match_start)) = match_data {
607 let sp = fragments[i].span;
608 let lo = sp.lo() + BytePos(match_start as u32);
611 return Some(
612 sp.with_lo(lo).with_hi(lo + BytePos((md_range.end - md_range.start) as u32)),
613 );
614 }
615 return None;
616 }
617
618 let snippet = map.span_to_snippet(span_of_fragments(fragments)?).ok()?;
619
620 let starting_line = markdown[..md_range.start].matches('\n').count();
621 let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
622
623 let mut src_lines = snippet.split_terminator('\n');
626 let md_lines = markdown.split_terminator('\n');
627
628 let mut start_bytes = 0;
631 let mut end_bytes = 0;
632
633 'outer: for (line_no, md_line) in md_lines.enumerate() {
634 loop {
635 let source_line = src_lines.next()?;
636 match source_line.find(md_line) {
637 Some(offset) => {
638 if line_no == starting_line {
639 start_bytes += offset;
640
641 if starting_line == ending_line {
642 break 'outer;
643 }
644 } else if line_no == ending_line {
645 end_bytes += offset;
646 break 'outer;
647 } else if line_no < starting_line {
648 start_bytes += source_line.len() - md_line.len();
649 } else {
650 end_bytes += source_line.len() - md_line.len();
651 }
652 break;
653 }
654 None => {
655 if line_no <= starting_line {
658 start_bytes += source_line.len() + 1;
659 } else {
660 end_bytes += source_line.len() + 1;
661 }
662 }
663 }
664 }
665 }
666
667 Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
668 md_range.start + start_bytes,
669 md_range.end + start_bytes + end_bytes,
670 )))
671}