rustdoc/passes/lint/
bare_urls.rs

1//! Detects links that are not linkified, e.g., in Markdown such as `Go to https://example.com/.`
2//! Suggests wrapping the link with angle brackets: `Go to <https://example.com/>.` to linkify it.
3
4use core::ops::Range;
5use std::mem;
6use std::sync::LazyLock;
7
8use pulldown_cmark::{Event, Parser, Tag};
9use regex::Regex;
10use rustc_errors::Applicability;
11use rustc_hir::HirId;
12use rustc_resolve::rustdoc::source_span_for_markdown_range;
13use tracing::trace;
14
15use crate::clean::*;
16use crate::core::DocContext;
17use crate::html::markdown::main_body_opts;
18
19pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &str) {
20    let report_diag = |cx: &DocContext<'_>, msg: &'static str, range: Range<usize>| {
21        let sp = source_span_for_markdown_range(cx.tcx, dox, &range, &item.attrs.doc_strings)
22            .unwrap_or_else(|| item.attr_span(cx.tcx));
23        cx.tcx.node_span_lint(crate::lint::BARE_URLS, hir_id, sp, |lint| {
24            lint.primary_message(msg)
25                .note("bare URLs are not automatically turned into clickable links")
26                .multipart_suggestion(
27                    "use an automatic link instead",
28                    vec![
29                        (sp.shrink_to_lo(), "<".to_string()),
30                        (sp.shrink_to_hi(), ">".to_string()),
31                    ],
32                    Applicability::MachineApplicable,
33                );
34        });
35    };
36
37    let mut p = Parser::new_ext(dox, main_body_opts()).into_offset_iter();
38
39    while let Some((event, range)) = p.next() {
40        match event {
41            Event::Text(s) => find_raw_urls(cx, &s, range, &report_diag),
42            // We don't want to check the text inside code blocks or links.
43            Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link { .. })) => {
44                for (event, _) in p.by_ref() {
45                    match event {
46                        Event::End(end)
47                            if mem::discriminant(&end) == mem::discriminant(&tag.to_end()) =>
48                        {
49                            break;
50                        }
51                        _ => {}
52                    }
53                }
54            }
55            _ => {}
56        }
57    }
58}
59
60static URL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
61    Regex::new(concat!(
62        r"https?://",                          // url scheme
63        r"([-a-zA-Z0-9@:%._\+~#=]{2,256}\.)+", // one or more subdomains
64        r"[a-zA-Z]{2,63}",                     // root domain
65        r"\b([-a-zA-Z0-9@:%_\+.~#?&/=]*)"      // optional query or url fragments
66    ))
67    .expect("failed to build regex")
68});
69
70fn find_raw_urls(
71    cx: &DocContext<'_>,
72    text: &str,
73    range: Range<usize>,
74    f: &impl Fn(&DocContext<'_>, &'static str, Range<usize>),
75) {
76    trace!("looking for raw urls in {text}");
77    // For now, we only check "full" URLs (meaning, starting with "http://" or "https://").
78    for match_ in URL_REGEX.find_iter(text) {
79        let url_range = match_.range();
80        f(
81            cx,
82            "this URL is not a hyperlink",
83            Range { start: range.start + url_range.start, end: range.start + url_range.end },
84        );
85    }
86}