rustdoc/passes/lint/
unescaped_backticks.rs

1//! Detects unescaped backticks (\`) in doc comments.
2
3use std::ops::Range;
4
5use pulldown_cmark::{BrokenLink, Event, Parser};
6use rustc_errors::Diag;
7use rustc_hir::HirId;
8use rustc_lint_defs::Applicability;
9use rustc_resolve::rustdoc::source_span_for_markdown_range;
10
11use crate::clean::Item;
12use crate::core::DocContext;
13use crate::html::markdown::main_body_opts;
14
15pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &str) {
16    let tcx = cx.tcx;
17
18    let link_names = item.link_names(&cx.cache);
19    let mut replacer = |broken_link: BrokenLink<'_>| {
20        link_names
21            .iter()
22            .find(|link| *link.original_text == *broken_link.reference)
23            .map(|link| ((*link.href).into(), (*link.new_text).into()))
24    };
25    let parser = Parser::new_with_broken_link_callback(dox, main_body_opts(), Some(&mut replacer))
26        .into_offset_iter();
27
28    let mut element_stack = Vec::new();
29
30    let mut prev_text_end = 0;
31    for (event, event_range) in parser {
32        match event {
33            Event::Start(_) => {
34                element_stack.push(Element::new(event_range));
35            }
36            Event::End(_) => {
37                let element = element_stack.pop().unwrap();
38
39                let Some(backtick_index) = element.backtick_index else {
40                    continue;
41                };
42
43                // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
44                // use the span of the entire attribute as a fallback.
45                let span = source_span_for_markdown_range(
46                    tcx,
47                    dox,
48                    &(backtick_index..backtick_index + 1),
49                    &item.attrs.doc_strings,
50                )
51                .unwrap_or_else(|| item.attr_span(tcx));
52
53                tcx.node_span_lint(crate::lint::UNESCAPED_BACKTICKS, hir_id, span, |lint| {
54                    lint.primary_message("unescaped backtick");
55
56                    let mut help_emitted = false;
57
58                    match element.prev_code_guess {
59                        PrevCodeGuess::None => {}
60                        PrevCodeGuess::Start { guess, .. } => {
61                            // "foo` `bar`" -> "`foo` `bar`"
62                            if let Some(suggest_index) =
63                                clamp_start(guess, &element.suggestible_ranges)
64                                && can_suggest_backtick(dox, suggest_index)
65                            {
66                                suggest_insertion(
67                                    cx,
68                                    item,
69                                    dox,
70                                    lint,
71                                    suggest_index,
72                                    '`',
73                                    "the opening backtick of a previous inline code may be missing",
74                                );
75                                help_emitted = true;
76                            }
77                        }
78                        PrevCodeGuess::End { guess, .. } => {
79                            // "`foo `bar`" -> "`foo` `bar`"
80                            // Don't `clamp_end` here, because the suggestion is guaranteed to be inside
81                            // an inline code node and we intentionally "break" the inline code here.
82                            let suggest_index = guess;
83                            if can_suggest_backtick(dox, suggest_index) {
84                                suggest_insertion(
85                                    cx,
86                                    item,
87                                    dox,
88                                    lint,
89                                    suggest_index,
90                                    '`',
91                                    "a previous inline code might be longer than expected",
92                                );
93                                help_emitted = true;
94                            }
95                        }
96                    }
97
98                    if !element.prev_code_guess.is_confident() {
99                        // "`foo` bar`" -> "`foo` `bar`"
100                        if let Some(guess) =
101                            guess_start_of_code(dox, element.element_range.start..backtick_index)
102                            && let Some(suggest_index) =
103                                clamp_start(guess, &element.suggestible_ranges)
104                            && can_suggest_backtick(dox, suggest_index)
105                        {
106                            suggest_insertion(
107                                cx,
108                                item,
109                                dox,
110                                lint,
111                                suggest_index,
112                                '`',
113                                "the opening backtick of an inline code may be missing",
114                            );
115                            help_emitted = true;
116                        }
117
118                        // "`foo` `bar" -> "`foo` `bar`"
119                        // Don't suggest closing backtick after single trailing char,
120                        // if we already suggested opening backtick. For example:
121                        // "foo`." -> "`foo`." or "foo`s" -> "`foo`s".
122                        if let Some(guess) =
123                            guess_end_of_code(dox, backtick_index + 1..element.element_range.end)
124                            && let Some(suggest_index) =
125                                clamp_end(guess, &element.suggestible_ranges)
126                            && can_suggest_backtick(dox, suggest_index)
127                            && (!help_emitted || suggest_index - backtick_index > 2)
128                        {
129                            suggest_insertion(
130                                cx,
131                                item,
132                                dox,
133                                lint,
134                                suggest_index,
135                                '`',
136                                "the closing backtick of an inline code may be missing",
137                            );
138                            help_emitted = true;
139                        }
140                    }
141
142                    if !help_emitted {
143                        lint.help(
144                            "the opening or closing backtick of an inline code may be missing",
145                        );
146                    }
147
148                    suggest_insertion(
149                        cx,
150                        item,
151                        dox,
152                        lint,
153                        backtick_index,
154                        '\\',
155                        "if you meant to use a literal backtick, escape it",
156                    );
157                });
158            }
159            Event::Code(_) => {
160                let element = element_stack
161                    .last_mut()
162                    .expect("expected inline code node to be inside of an element");
163                assert!(
164                    event_range.start >= element.element_range.start
165                        && event_range.end <= element.element_range.end
166                );
167
168                // This inline code might be longer than it's supposed to be.
169                // Only check single backtick inline code for now.
170                if !element.prev_code_guess.is_confident()
171                    && dox.as_bytes().get(event_range.start) == Some(&b'`')
172                    && dox.as_bytes().get(event_range.start + 1) != Some(&b'`')
173                {
174                    let range_inside = event_range.start + 1..event_range.end - 1;
175                    let text_inside = &dox[range_inside.clone()];
176
177                    let is_confident = text_inside.starts_with(char::is_whitespace)
178                        || text_inside.ends_with(char::is_whitespace);
179
180                    if let Some(guess) = guess_end_of_code(dox, range_inside) {
181                        // Find earlier end of code.
182                        element.prev_code_guess = PrevCodeGuess::End { guess, is_confident };
183                    } else {
184                        // Find alternate start of code.
185                        let range_before = element.element_range.start..event_range.start;
186                        if let Some(guess) = guess_start_of_code(dox, range_before) {
187                            element.prev_code_guess = PrevCodeGuess::Start { guess, is_confident };
188                        }
189                    }
190                }
191            }
192            Event::Text(text) => {
193                let element = element_stack
194                    .last_mut()
195                    .expect("expected inline text node to be inside of an element");
196                assert!(
197                    event_range.start >= element.element_range.start
198                        && event_range.end <= element.element_range.end
199                );
200
201                // The first char is escaped if the prev char is \ and not part of a text node.
202                let is_escaped = prev_text_end < event_range.start
203                    && dox.as_bytes()[event_range.start - 1] == b'\\';
204
205                // Don't lint backslash-escaped (\`) or html-escaped (&#96;) backticks.
206                if *text == *"`" && !is_escaped && *text == dox[event_range.clone()] {
207                    // We found a stray backtick.
208                    assert!(
209                        element.backtick_index.is_none(),
210                        "expected at most one unescaped backtick per element",
211                    );
212                    element.backtick_index = Some(event_range.start);
213                }
214
215                prev_text_end = event_range.end;
216
217                if is_escaped {
218                    // Ensure that we suggest "`\x" and not "\`x".
219                    element.suggestible_ranges.push(event_range.start - 1..event_range.end);
220                } else {
221                    element.suggestible_ranges.push(event_range);
222                }
223            }
224            _ => {}
225        }
226    }
227}
228
229/// A previous inline code node, that looks wrong.
230///
231/// `guess` is the position, where we want to suggest a \` and the guess `is_confident` if an
232/// inline code starts or ends with a whitespace.
233#[derive(Debug)]
234enum PrevCodeGuess {
235    None,
236
237    /// Missing \` at start.
238    ///
239    /// ```markdown
240    /// foo` `bar`
241    /// ```
242    Start {
243        guess: usize,
244        is_confident: bool,
245    },
246
247    /// Missing \` at end.
248    ///
249    /// ```markdown
250    /// `foo `bar`
251    /// ```
252    End {
253        guess: usize,
254        is_confident: bool,
255    },
256}
257
258impl PrevCodeGuess {
259    fn is_confident(&self) -> bool {
260        match *self {
261            PrevCodeGuess::None => false,
262            PrevCodeGuess::Start { is_confident, .. } | PrevCodeGuess::End { is_confident, .. } => {
263                is_confident
264            }
265        }
266    }
267}
268
269/// A markdown [tagged element], which may or may not contain an unescaped backtick.
270///
271/// [tagged element]: https://docs.rs/pulldown-cmark/0.9/pulldown_cmark/enum.Tag.html
272#[derive(Debug)]
273struct Element {
274    /// The full range (span) of the element in the doc string.
275    element_range: Range<usize>,
276
277    /// The ranges where we're allowed to put backticks.
278    /// This is used to prevent breaking markdown elements like links or lists.
279    suggestible_ranges: Vec<Range<usize>>,
280
281    /// The unescaped backtick.
282    backtick_index: Option<usize>,
283
284    /// Suggest a different start or end of an inline code.
285    prev_code_guess: PrevCodeGuess,
286}
287
288impl Element {
289    const fn new(element_range: Range<usize>) -> Self {
290        Self {
291            element_range,
292            suggestible_ranges: Vec::new(),
293            backtick_index: None,
294            prev_code_guess: PrevCodeGuess::None,
295        }
296    }
297}
298
299/// Given a potentially unclosed inline code, attempt to find the start.
300fn guess_start_of_code(dox: &str, range: Range<usize>) -> Option<usize> {
301    assert!(dox.as_bytes()[range.end] == b'`');
302
303    let mut braces = 0;
304    let mut guess = 0;
305    for (idx, ch) in dox[range.clone()].char_indices().rev() {
306        match ch {
307            ')' | ']' | '}' => braces += 1,
308            '(' | '[' | '{' => {
309                if braces == 0 {
310                    guess = idx + 1;
311                    break;
312                }
313                braces -= 1;
314            }
315            ch if ch.is_whitespace() && braces == 0 => {
316                guess = idx + 1;
317                break;
318            }
319            _ => (),
320        }
321    }
322
323    guess += range.start;
324
325    // Don't suggest empty inline code or duplicate backticks.
326    can_suggest_backtick(dox, guess).then_some(guess)
327}
328
329/// Given a potentially unclosed inline code, attempt to find the end.
330fn guess_end_of_code(dox: &str, range: Range<usize>) -> Option<usize> {
331    // Punctuation that should be outside of the inline code.
332    const TRAILING_PUNCTUATION: &[u8] = b".,";
333
334    assert!(dox.as_bytes()[range.start - 1] == b'`');
335
336    let text = dox[range.clone()].trim_end();
337    let mut braces = 0;
338    let mut guess = text.len();
339    for (idx, ch) in text.char_indices() {
340        match ch {
341            '(' | '[' | '{' => braces += 1,
342            ')' | ']' | '}' => {
343                if braces == 0 {
344                    guess = idx;
345                    break;
346                }
347                braces -= 1;
348            }
349            ch if ch.is_whitespace() && braces == 0 => {
350                guess = idx;
351                break;
352            }
353            _ => (),
354        }
355    }
356
357    // Strip a single trailing punctuation.
358    if guess >= 1
359        && TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 1])
360        && (guess < 2 || !TRAILING_PUNCTUATION.contains(&text.as_bytes()[guess - 2]))
361    {
362        guess -= 1;
363    }
364
365    guess += range.start;
366
367    // Don't suggest empty inline code or duplicate backticks.
368    can_suggest_backtick(dox, guess).then_some(guess)
369}
370
371/// Returns whether inserting a backtick at `dox[index]` will not produce double backticks.
372fn can_suggest_backtick(dox: &str, index: usize) -> bool {
373    (index == 0 || dox.as_bytes()[index - 1] != b'`')
374        && (index == dox.len() || dox.as_bytes()[index] != b'`')
375}
376
377/// Increase the index until it is inside or one past the end of one of the ranges.
378///
379/// The ranges must be sorted for this to work correctly.
380fn clamp_start(index: usize, ranges: &[Range<usize>]) -> Option<usize> {
381    for range in ranges {
382        if range.start >= index {
383            return Some(range.start);
384        }
385        if index <= range.end {
386            return Some(index);
387        }
388    }
389    None
390}
391
392/// Decrease the index until it is inside or one past the end of one of the ranges.
393///
394/// The ranges must be sorted for this to work correctly.
395fn clamp_end(index: usize, ranges: &[Range<usize>]) -> Option<usize> {
396    for range in ranges.iter().rev() {
397        if range.end <= index {
398            return Some(range.end);
399        }
400        if index >= range.start {
401            return Some(index);
402        }
403    }
404    None
405}
406
407/// Try to emit a span suggestion and fall back to help messages if we can't find a suitable span.
408///
409/// This helps finding backticks in huge macro-generated docs.
410fn suggest_insertion(
411    cx: &DocContext<'_>,
412    item: &Item,
413    dox: &str,
414    lint: &mut Diag<'_, ()>,
415    insert_index: usize,
416    suggestion: char,
417    message: &'static str,
418) {
419    /// Maximum bytes of context to show around the insertion.
420    const CONTEXT_MAX_LEN: usize = 80;
421
422    if let Some(span) = source_span_for_markdown_range(
423        cx.tcx,
424        dox,
425        &(insert_index..insert_index),
426        &item.attrs.doc_strings,
427    ) {
428        lint.span_suggestion(span, message, suggestion, Applicability::MaybeIncorrect);
429    } else {
430        let line_start = dox[..insert_index].rfind('\n').map_or(0, |idx| idx + 1);
431        let line_end = dox[insert_index..].find('\n').map_or(dox.len(), |idx| idx + insert_index);
432
433        let context_before_max_len = if insert_index - line_start < CONTEXT_MAX_LEN / 2 {
434            insert_index - line_start
435        } else if line_end - insert_index < CONTEXT_MAX_LEN / 2 {
436            CONTEXT_MAX_LEN - (line_end - insert_index)
437        } else {
438            CONTEXT_MAX_LEN / 2
439        };
440        let context_after_max_len = CONTEXT_MAX_LEN - context_before_max_len;
441
442        let (prefix, context_start) = if insert_index - line_start <= context_before_max_len {
443            ("", line_start)
444        } else {
445            ("...", dox.ceil_char_boundary(insert_index - context_before_max_len))
446        };
447        let (suffix, context_end) = if line_end - insert_index <= context_after_max_len {
448            ("", line_end)
449        } else {
450            ("...", dox.floor_char_boundary(insert_index + context_after_max_len))
451        };
452
453        let context_full = &dox[context_start..context_end].trim_end();
454        let context_before = &dox[context_start..insert_index];
455        let context_after = &dox[insert_index..context_end].trim_end();
456        lint.help(format!(
457            "{message}\n change: {prefix}{context_full}{suffix}\nto this: {prefix}{context_before}{suggestion}{context_after}{suffix}"
458        ));
459    }
460}