Skip to main content

rustdoc/passes/lint/
bare_urls.rs

1//! Detects links that are not linkified, e.g., in Markdown such as `Go to https://example.com/.`
2//! Suggests wrapping the link with angle brackets: `Go to <https://example.com/>.` to linkify it.
3
4use core::ops::Range;
5use std::mem;
6use std::sync::LazyLock;
7
8use regex::Regex;
9use rustc_errors::{Applicability, DiagDecorator};
10use rustc_hir::HirId;
11use rustc_resolve::rustdoc::pulldown_cmark::{Event, Parser, Tag};
12use rustc_resolve::rustdoc::source_span_for_markdown_range;
13use tracing::trace;
14
15use crate::clean::*;
16use crate::core::DocContext;
17use crate::html::markdown::main_body_opts;
18
19pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &str) {
20    let report_diag = |cx: &DocContext<'_>,
21                       msg: &'static str,
22                       range: Range<usize>,
23                       without_brackets: Option<&str>| {
24        let maybe_sp = source_span_for_markdown_range(cx.tcx, dox, &range, &item.attrs.doc_strings)
25            .map(|(sp, _)| sp);
26        let sp = maybe_sp.unwrap_or_else(|| item.attr_span(cx.tcx));
27        cx.tcx.emit_node_span_lint(
28            crate::lint::BARE_URLS,
29            hir_id,
30            sp,
31            DiagDecorator(|lint| {
32                lint.primary_message(msg)
33                    .note("bare URLs are not automatically turned into clickable links");
34                // The fallback of using the attribute span is suitable for
35                // highlighting where the error is, but not for placing the < and >
36                if let Some(sp) = maybe_sp {
37                    if let Some(without_brackets) = without_brackets {
38                        lint.multipart_suggestion(
39                            "use an automatic link instead",
40                            vec![(sp, format!("<{without_brackets}>"))],
41                            Applicability::MachineApplicable,
42                        );
43                    } else {
44                        lint.multipart_suggestion(
45                            "use an automatic link instead",
46                            vec![
47                                (sp.shrink_to_lo(), "<".to_string()),
48                                (sp.shrink_to_hi(), ">".to_string()),
49                            ],
50                            Applicability::MachineApplicable,
51                        );
52                    }
53                }
54            }),
55        );
56    };
57
58    let mut p = Parser::new_ext(dox, main_body_opts()).into_offset_iter();
59
60    while let Some((event, range)) = p.next() {
61        match event {
62            Event::Text(s) => find_raw_urls(cx, dox, &s, range, &report_diag),
63            // We don't want to check the text inside code blocks or links.
64            Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link { .. })) => {
65                for (event, _) in p.by_ref() {
66                    match event {
67                        Event::End(end)
68                            if mem::discriminant(&end) == mem::discriminant(&tag.to_end()) =>
69                        {
70                            break;
71                        }
72                        _ => {}
73                    }
74                }
75            }
76            _ => {}
77        }
78    }
79}
80
81static URL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
82    Regex::new(concat!(
83        r"https?://",                          // url scheme
84        r"([-a-zA-Z0-9@:%._\+~#=]{2,256}\.)+", // one or more subdomains
85        r"[a-zA-Z]{2,63}",                     // root domain
86        r"\b([-a-zA-Z0-9@:%_\+.~#?&/=]*)",     // optional query or url fragments
87    ))
88    .expect("failed to build regex")
89});
90
91fn find_raw_urls(
92    cx: &DocContext<'_>,
93    dox: &str,
94    text: &str,
95    range: Range<usize>,
96    f: &impl Fn(&DocContext<'_>, &'static str, Range<usize>, Option<&str>),
97) {
98    trace!("looking for raw urls in {text}");
99    // For now, we only check "full" URLs (meaning, starting with "http://" or "https://").
100    for match_ in URL_REGEX.find_iter(text) {
101        let mut url_range = match_.range();
102        url_range.start += range.start;
103        url_range.end += range.start;
104        let mut without_brackets = None;
105        // If the link is contained inside `[]`, then we need to replace the brackets and
106        // not just add `<>`.
107        if dox[..url_range.start].ends_with('[')
108            && url_range.end <= dox.len()
109            && dox[url_range.end..].starts_with(']')
110        {
111            url_range.start -= 1;
112            url_range.end += 1;
113            without_brackets = Some(match_.as_str());
114        }
115        f(cx, "this URL is not a hyperlink", url_range, without_brackets);
116    }
117}