1use std::mem;
2use std::ops::Range;
3
4use pulldown_cmark::{
5 BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
6};
7use rustc_ast as ast;
8use rustc_ast::attr::AttributeExt;
9use rustc_ast::util::comments::beautify_doc_string;
10use rustc_data_structures::fx::FxIndexMap;
11use rustc_middle::ty::TyCtxt;
12use rustc_span::def_id::DefId;
13use rustc_span::{DUMMY_SP, InnerSpan, Span, Symbol, kw, sym};
14use thin_vec::ThinVec;
15use tracing::{debug, trace};
16
17#[derive(Clone, Copy, PartialEq, Eq, Debug)]
18pub enum DocFragmentKind {
19 SugaredDoc,
21 RawDoc,
23}
24
25#[derive(Clone, PartialEq, Eq, Debug)]
34pub struct DocFragment {
35 pub span: Span,
36 pub item_id: Option<DefId>,
43 pub doc: Symbol,
44 pub kind: DocFragmentKind,
45 pub indent: usize,
46}
47
48#[derive(Clone, Copy, Debug)]
49pub enum MalformedGenerics {
50 UnbalancedAngleBrackets,
54 MissingType,
60 HasFullyQualifiedSyntax,
67 InvalidPathSeparator,
79 TooManyAngleBrackets,
83 EmptyAngleBrackets,
87}
88
89pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
103 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
116 && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
117 {
118 1
121 } else {
122 0
123 };
124
125 let Some(min_indent) = docs
135 .iter()
136 .map(|fragment| {
137 fragment
138 .doc
139 .as_str()
140 .lines()
141 .filter(|line| line.chars().any(|c| !c.is_whitespace()))
142 .map(|line| {
143 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
146 whitespace
147 + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
148 })
149 .min()
150 .unwrap_or(usize::MAX)
151 })
152 .min()
153 else {
154 return;
155 };
156
157 for fragment in docs {
158 if fragment.doc == kw::Empty {
159 continue;
160 }
161
162 let indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
163 min_indent - add
164 } else {
165 min_indent
166 };
167
168 fragment.indent = indent;
169 }
170}
171
172pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
178 if frag.doc == kw::Empty {
179 out.push('\n');
180 return;
181 }
182 let s = frag.doc.as_str();
183 let mut iter = s.lines();
184
185 while let Some(line) = iter.next() {
186 if line.chars().any(|c| !c.is_whitespace()) {
187 assert!(line.len() >= frag.indent);
188 out.push_str(&line[frag.indent..]);
189 } else {
190 out.push_str(line);
191 }
192 out.push('\n');
193 }
194}
195
196pub fn attrs_to_doc_fragments<'a, A: AttributeExt + Clone + 'a>(
197 attrs: impl Iterator<Item = (&'a A, Option<DefId>)>,
198 doc_only: bool,
199) -> (Vec<DocFragment>, ThinVec<A>) {
200 let mut doc_fragments = Vec::new();
201 let mut other_attrs = ThinVec::<A>::new();
202 for (attr, item_id) in attrs {
203 if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
204 let doc = beautify_doc_string(doc_str, comment_kind);
205 let (span, kind) = if attr.is_doc_comment() {
206 (attr.span(), DocFragmentKind::SugaredDoc)
207 } else {
208 (
209 attr.value_span()
210 .map(|i| i.with_ctxt(attr.span().ctxt()))
211 .unwrap_or(attr.span()),
212 DocFragmentKind::RawDoc,
213 )
214 };
215 let fragment = DocFragment { span, doc, kind, item_id, indent: 0 };
216 doc_fragments.push(fragment);
217 } else if !doc_only {
218 other_attrs.push(attr.clone());
219 }
220 }
221
222 unindent_doc_fragments(&mut doc_fragments);
223
224 (doc_fragments, other_attrs)
225}
226
227pub fn prepare_to_doc_link_resolution(
233 doc_fragments: &[DocFragment],
234) -> FxIndexMap<Option<DefId>, String> {
235 let mut res = FxIndexMap::default();
236 for fragment in doc_fragments {
237 let out_str = res.entry(fragment.item_id).or_default();
238 add_doc_fragment(out_str, fragment);
239 }
240 res
241}
242
243pub fn main_body_opts() -> Options {
245 Options::ENABLE_TABLES
246 | Options::ENABLE_FOOTNOTES
247 | Options::ENABLE_STRIKETHROUGH
248 | Options::ENABLE_TASKLISTS
249 | Options::ENABLE_SMART_PUNCTUATION
250}
251
252fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
253 let mut stripped_segment = String::new();
254 let mut param_depth = 0;
255
256 let mut latest_generics_chunk = String::new();
257
258 for c in segment {
259 if c == '<' {
260 param_depth += 1;
261 latest_generics_chunk.clear();
262 } else if c == '>' {
263 param_depth -= 1;
264 if latest_generics_chunk.contains(" as ") {
265 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
268 }
269 } else if param_depth == 0 {
270 stripped_segment.push(c);
271 } else {
272 latest_generics_chunk.push(c);
273 }
274 }
275
276 if param_depth == 0 {
277 Ok(stripped_segment)
278 } else {
279 Err(MalformedGenerics::UnbalancedAngleBrackets)
281 }
282}
283
284pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
285 if !path_str.contains(['<', '>']) {
286 return Ok(path_str.into());
287 }
288 let mut stripped_segments = vec![];
289 let mut path = path_str.chars().peekable();
290 let mut segment = Vec::new();
291
292 while let Some(chr) = path.next() {
293 match chr {
294 ':' => {
295 if path.next_if_eq(&':').is_some() {
296 let stripped_segment =
297 strip_generics_from_path_segment(mem::take(&mut segment))?;
298 if !stripped_segment.is_empty() {
299 stripped_segments.push(stripped_segment);
300 }
301 } else {
302 return Err(MalformedGenerics::InvalidPathSeparator);
303 }
304 }
305 '<' => {
306 segment.push(chr);
307
308 match path.next() {
309 Some('<') => {
310 return Err(MalformedGenerics::TooManyAngleBrackets);
311 }
312 Some('>') => {
313 return Err(MalformedGenerics::EmptyAngleBrackets);
314 }
315 Some(chr) => {
316 segment.push(chr);
317
318 while let Some(chr) = path.next_if(|c| *c != '>') {
319 segment.push(chr);
320 }
321 }
322 None => break,
323 }
324 }
325 _ => segment.push(chr),
326 }
327 trace!("raw segment: {:?}", segment);
328 }
329
330 if !segment.is_empty() {
331 let stripped_segment = strip_generics_from_path_segment(segment)?;
332 if !stripped_segment.is_empty() {
333 stripped_segments.push(stripped_segment);
334 }
335 }
336
337 debug!("path_str: {path_str:?}\nstripped segments: {stripped_segments:?}");
338
339 let stripped_path = stripped_segments.join("::");
340
341 if !stripped_path.is_empty() {
342 Ok(stripped_path.into())
343 } else {
344 Err(MalformedGenerics::MissingType)
345 }
346}
347
348pub fn inner_docs(attrs: &[impl AttributeExt]) -> bool {
353 attrs.iter().find(|a| a.doc_str().is_some()).is_none_or(|a| a.style() == ast::AttrStyle::Inner)
354}
355
356pub fn has_primitive_or_keyword_docs(attrs: &[impl AttributeExt]) -> bool {
358 for attr in attrs {
359 if attr.has_name(sym::rustc_doc_primitive) {
360 return true;
361 } else if attr.has_name(sym::doc)
362 && let Some(items) = attr.meta_item_list()
363 {
364 for item in items {
365 if item.has_name(sym::keyword) {
366 return true;
367 }
368 }
369 }
370 }
371 false
372}
373
374fn preprocess_link(link: &str) -> Box<str> {
378 let link = link.replace('`', "");
379 let link = link.split('#').next().unwrap();
380 let link = link.trim();
381 let link = link.rsplit('@').next().unwrap();
382 let link = link.strip_suffix("()").unwrap_or(link);
383 let link = link.strip_suffix("{}").unwrap_or(link);
384 let link = link.strip_suffix("[]").unwrap_or(link);
385 let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
386 let link = link.trim();
387 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
388}
389
390pub fn may_be_doc_link(link_type: LinkType) -> bool {
393 match link_type {
394 LinkType::Inline
395 | LinkType::Reference
396 | LinkType::ReferenceUnknown
397 | LinkType::Collapsed
398 | LinkType::CollapsedUnknown
399 | LinkType::Shortcut
400 | LinkType::ShortcutUnknown => true,
401 LinkType::Autolink | LinkType::Email => false,
402 }
403}
404
405pub(crate) fn attrs_to_preprocessed_links<A: AttributeExt + Clone>(attrs: &[A]) -> Vec<Box<str>> {
408 let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
409 let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
410
411 parse_links(&doc)
412}
413
414fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
417 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
418 let mut event_iter = Parser::new_with_broken_link_callback(
419 doc,
420 main_body_opts(),
421 Some(&mut broken_link_callback),
422 );
423 let mut links = Vec::new();
424
425 while let Some(event) = event_iter.next() {
426 match event {
427 Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ })
428 if may_be_doc_link(link_type) =>
429 {
430 if matches!(
431 link_type,
432 LinkType::Inline
433 | LinkType::ReferenceUnknown
434 | LinkType::Reference
435 | LinkType::Shortcut
436 | LinkType::ShortcutUnknown
437 ) {
438 if let Some(display_text) = collect_link_data(&mut event_iter) {
439 links.push(display_text);
440 }
441 }
442
443 links.push(preprocess_link(&dest_url));
444 }
445 _ => {}
446 }
447 }
448
449 links
450}
451
452fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
454 event_iter: &mut Parser<'input, F>,
455) -> Option<Box<str>> {
456 let mut display_text: Option<String> = None;
457 let mut append_text = |text: CowStr<'_>| {
458 if let Some(display_text) = &mut display_text {
459 display_text.push_str(&text);
460 } else {
461 display_text = Some(text.to_string());
462 }
463 };
464
465 while let Some(event) = event_iter.next() {
466 match event {
467 Event::Text(text) => {
468 append_text(text);
469 }
470 Event::Code(code) => {
471 append_text(code);
472 }
473 Event::End(_) => {
474 break;
475 }
476 _ => {}
477 }
478 }
479
480 display_text.map(String::into_boxed_str)
481}
482
483pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
485 if fragments.is_empty() {
486 return None;
487 }
488 let start = fragments[0].span;
489 if start == DUMMY_SP {
490 return None;
491 }
492 let end = fragments.last().expect("no doc strings provided").span;
493 Some(start.to(end))
494}
495
496pub fn source_span_for_markdown_range(
507 tcx: TyCtxt<'_>,
508 markdown: &str,
509 md_range: &Range<usize>,
510 fragments: &[DocFragment],
511) -> Option<Span> {
512 if let &[fragment] = &fragments
513 && fragment.kind == DocFragmentKind::RawDoc
514 && let Ok(snippet) = tcx.sess.source_map().span_to_snippet(fragment.span)
515 && snippet.trim_end() == markdown.trim_end()
516 && let Ok(md_range_lo) = u32::try_from(md_range.start)
517 && let Ok(md_range_hi) = u32::try_from(md_range.end)
518 {
519 return Some(Span::new(
521 fragment.span.lo() + rustc_span::BytePos(md_range_lo),
522 fragment.span.lo() + rustc_span::BytePos(md_range_hi),
523 fragment.span.ctxt(),
524 fragment.span.parent(),
525 ));
526 }
527
528 let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
529
530 if !is_all_sugared_doc {
531 return None;
532 }
533
534 let snippet = tcx.sess.source_map().span_to_snippet(span_of_fragments(fragments)?).ok()?;
535
536 let starting_line = markdown[..md_range.start].matches('\n').count();
537 let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
538
539 let mut src_lines = snippet.split_terminator('\n');
542 let md_lines = markdown.split_terminator('\n');
543
544 let mut start_bytes = 0;
547 let mut end_bytes = 0;
548
549 'outer: for (line_no, md_line) in md_lines.enumerate() {
550 loop {
551 let source_line = src_lines.next()?;
552 match source_line.find(md_line) {
553 Some(offset) => {
554 if line_no == starting_line {
555 start_bytes += offset;
556
557 if starting_line == ending_line {
558 break 'outer;
559 }
560 } else if line_no == ending_line {
561 end_bytes += offset;
562 break 'outer;
563 } else if line_no < starting_line {
564 start_bytes += source_line.len() - md_line.len();
565 } else {
566 end_bytes += source_line.len() - md_line.len();
567 }
568 break;
569 }
570 None => {
571 if line_no <= starting_line {
574 start_bytes += source_line.len() + 1;
575 } else {
576 end_bytes += source_line.len() + 1;
577 }
578 }
579 }
580 }
581 }
582
583 Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
584 md_range.start + start_bytes,
585 md_range.end + start_bytes + end_bytes,
586 )))
587}