rustc_parse/lexer/
unescape_error_reporting.rs

1//! Utilities for rendering escape sequence errors as diagnostics.
2
3use std::iter::once;
4use std::ops::Range;
5
6use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed};
7use rustc_lexer::unescape::{EscapeError, Mode};
8use rustc_span::{BytePos, Span};
9use tracing::debug;
10
11use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, UnescapeError};
12
13pub(crate) fn emit_unescape_error(
14    dcx: DiagCtxtHandle<'_>,
15    // interior part of the literal, between quotes
16    lit: &str,
17    // full span of the literal, including quotes and any prefix
18    full_lit_span: Span,
19    // span of the error part of the literal
20    err_span: Span,
21    mode: Mode,
22    // range of the error inside `lit`
23    range: Range<usize>,
24    error: EscapeError,
25) -> Option<ErrorGuaranteed> {
26    debug!(
27        "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
28        lit, full_lit_span, mode, range, error
29    );
30    let last_char = || {
31        let c = lit[range.clone()].chars().next_back().unwrap();
32        let span = err_span.with_lo(err_span.hi() - BytePos(c.len_utf8() as u32));
33        (c, span)
34    };
35    Some(match error {
36        EscapeError::LoneSurrogateUnicodeEscape => {
37            dcx.emit_err(UnescapeError::InvalidUnicodeEscape { span: err_span, surrogate: true })
38        }
39        EscapeError::OutOfRangeUnicodeEscape => {
40            dcx.emit_err(UnescapeError::InvalidUnicodeEscape { span: err_span, surrogate: false })
41        }
42        EscapeError::MoreThanOneChar => {
43            use unicode_normalization::UnicodeNormalization;
44            use unicode_normalization::char::is_combining_mark;
45            let mut sugg = None;
46            let mut note = None;
47
48            let lit_chars = lit.chars().collect::<Vec<_>>();
49            let (first, rest) = lit_chars.split_first().unwrap();
50            if rest.iter().copied().all(is_combining_mark) {
51                let normalized = lit.nfc().to_string();
52                if normalized.chars().count() == 1 {
53                    let ch = normalized.chars().next().unwrap().escape_default().to_string();
54                    sugg = Some(MoreThanOneCharSugg::NormalizedForm {
55                        span: err_span,
56                        ch,
57                        normalized,
58                    });
59                }
60                let escaped_marks =
61                    rest.iter().map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
62                note = Some(MoreThanOneCharNote::AllCombining {
63                    span: err_span,
64                    chr: format!("{first}"),
65                    len: escaped_marks.len(),
66                    escaped_marks: escaped_marks.join(""),
67                });
68            } else {
69                let printable: Vec<char> = lit
70                    .chars()
71                    .filter(|&x| {
72                        unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
73                            && !x.is_whitespace()
74                    })
75                    .collect();
76
77                if let &[ch] = printable.as_slice() {
78                    sugg = Some(MoreThanOneCharSugg::RemoveNonPrinting {
79                        span: err_span,
80                        ch: ch.to_string(),
81                    });
82                    note = Some(MoreThanOneCharNote::NonPrinting {
83                        span: err_span,
84                        escaped: lit.escape_default().to_string(),
85                    });
86                }
87            };
88            let sugg = sugg.unwrap_or_else(|| {
89                let prefix = mode.prefix_noraw();
90                let mut escaped = String::with_capacity(lit.len());
91                let mut in_escape = false;
92                for c in lit.chars() {
93                    match c {
94                        '\\' => in_escape = !in_escape,
95                        '"' if !in_escape => escaped.push('\\'),
96                        _ => in_escape = false,
97                    }
98                    escaped.push(c);
99                }
100                if escaped.len() != lit.len() || full_lit_span.is_empty() {
101                    let sugg = format!("{prefix}\"{escaped}\"");
102                    MoreThanOneCharSugg::QuotesFull {
103                        span: full_lit_span,
104                        is_byte: mode == Mode::Byte,
105                        sugg,
106                    }
107                } else {
108                    MoreThanOneCharSugg::Quotes {
109                        start: full_lit_span
110                            .with_hi(full_lit_span.lo() + BytePos((prefix.len() + 1) as u32)),
111                        end: full_lit_span.with_lo(full_lit_span.hi() - BytePos(1)),
112                        is_byte: mode == Mode::Byte,
113                        prefix,
114                    }
115                }
116            });
117            dcx.emit_err(UnescapeError::MoreThanOneChar {
118                span: full_lit_span,
119                note,
120                suggestion: sugg,
121            })
122        }
123        EscapeError::EscapeOnlyChar => {
124            let (c, char_span) = last_char();
125            dcx.emit_err(UnescapeError::EscapeOnlyChar {
126                span: err_span,
127                char_span,
128                escaped_sugg: c.escape_default().to_string(),
129                escaped_msg: escaped_char(c),
130                byte: mode == Mode::Byte,
131            })
132        }
133        EscapeError::BareCarriageReturn => {
134            let double_quotes = mode.in_double_quotes();
135            dcx.emit_err(UnescapeError::BareCr { span: err_span, double_quotes })
136        }
137        EscapeError::BareCarriageReturnInRawString => {
138            assert!(mode.in_double_quotes());
139            dcx.emit_err(UnescapeError::BareCrRawString(err_span))
140        }
141        EscapeError::InvalidEscape => {
142            let (c, span) = last_char();
143
144            let label = if mode == Mode::Byte || mode == Mode::ByteStr {
145                "unknown byte escape"
146            } else {
147                "unknown character escape"
148            };
149            let ec = escaped_char(c);
150            let mut diag = dcx.struct_span_err(span, format!("{label}: `{ec}`"));
151            diag.span_label(span, label);
152            if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
153                diag.help(
154                    "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
155                );
156            } else if c == '\r' {
157                diag.help(
158                    "this is an isolated carriage return; consider checking your editor and \
159                     version control settings",
160                );
161            } else {
162                if mode == Mode::Str || mode == Mode::Char {
163                    diag.span_suggestion(
164                        full_lit_span,
165                        "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
166                        format!("r\"{lit}\""),
167                        Applicability::MaybeIncorrect,
168                    );
169                }
170
171                diag.help(
172                    "for more information, visit \
173                     <https://doc.rust-lang.org/reference/tokens.html#literals>",
174                );
175            }
176            diag.emit()
177        }
178        EscapeError::TooShortHexEscape => dcx.emit_err(UnescapeError::TooShortHexEscape(err_span)),
179        EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
180            let (c, span) = last_char();
181            let is_hex = error == EscapeError::InvalidCharInHexEscape;
182            let ch = escaped_char(c);
183            dcx.emit_err(UnescapeError::InvalidCharInEscape { span, is_hex, ch })
184        }
185        EscapeError::NonAsciiCharInByte => {
186            let (c, span) = last_char();
187            let desc = match mode {
188                Mode::Byte => "byte literal",
189                Mode::ByteStr => "byte string literal",
190                Mode::RawByteStr => "raw byte string literal",
191                _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
192            };
193            let mut err = dcx.struct_span_err(span, format!("non-ASCII character in {desc}"));
194            let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
195                format!(" but is {c:?}")
196            } else {
197                String::new()
198            };
199            err.span_label(span, format!("must be ASCII{postfix}"));
200            // Note: the \\xHH suggestions are not given for raw byte string
201            // literals, because they are araw and so cannot use any escapes.
202            if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
203                err.span_suggestion(
204                    span,
205                    format!(
206                        "if you meant to use the unicode code point for {c:?}, use a \\xHH escape"
207                    ),
208                    format!("\\x{:X}", c as u32),
209                    Applicability::MaybeIncorrect,
210                );
211            } else if mode == Mode::Byte {
212                err.span_label(span, "this multibyte character does not fit into a single byte");
213            } else if mode != Mode::RawByteStr {
214                let mut utf8 = String::new();
215                utf8.push(c);
216                err.span_suggestion(
217                    span,
218                    format!("if you meant to use the UTF-8 encoding of {c:?}, use \\xHH escapes"),
219                    utf8.as_bytes()
220                        .iter()
221                        .map(|b: &u8| format!("\\x{:X}", *b))
222                        .fold("".to_string(), |a, c| a + &c),
223                    Applicability::MaybeIncorrect,
224                );
225            }
226            err.emit()
227        }
228        EscapeError::OutOfRangeHexEscape => {
229            dcx.emit_err(UnescapeError::OutOfRangeHexEscape(err_span))
230        }
231        EscapeError::LeadingUnderscoreUnicodeEscape => {
232            let (c, span) = last_char();
233            dcx.emit_err(UnescapeError::LeadingUnderscoreUnicodeEscape {
234                span,
235                ch: escaped_char(c),
236            })
237        }
238        EscapeError::OverlongUnicodeEscape => {
239            dcx.emit_err(UnescapeError::OverlongUnicodeEscape(err_span))
240        }
241        EscapeError::UnclosedUnicodeEscape => {
242            dcx.emit_err(UnescapeError::UnclosedUnicodeEscape(err_span, err_span.shrink_to_hi()))
243        }
244        EscapeError::NoBraceInUnicodeEscape => {
245            let mut suggestion = "\\u{".to_owned();
246            let mut suggestion_len = 0;
247            let (c, char_span) = last_char();
248            let chars = once(c).chain(lit[range.end..].chars());
249            for c in chars.take(6).take_while(|c| c.is_digit(16)) {
250                suggestion.push(c);
251                suggestion_len += c.len_utf8();
252            }
253
254            let (label, sub) = if suggestion_len > 0 {
255                suggestion.push('}');
256                let hi = char_span.lo() + BytePos(suggestion_len as u32);
257                (None, NoBraceUnicodeSub::Suggestion { span: err_span.with_hi(hi), suggestion })
258            } else {
259                (Some(err_span), NoBraceUnicodeSub::Help)
260            };
261            dcx.emit_err(UnescapeError::NoBraceInUnicodeEscape { span: err_span, label, sub })
262        }
263        EscapeError::UnicodeEscapeInByte => {
264            dcx.emit_err(UnescapeError::UnicodeEscapeInByte(err_span))
265        }
266        EscapeError::EmptyUnicodeEscape => {
267            dcx.emit_err(UnescapeError::EmptyUnicodeEscape(err_span))
268        }
269        EscapeError::ZeroChars => dcx.emit_err(UnescapeError::ZeroChars(err_span)),
270        EscapeError::LoneSlash => dcx.emit_err(UnescapeError::LoneSlash(err_span)),
271        EscapeError::NulInCStr => dcx.emit_err(UnescapeError::NulInCStr { span: err_span }),
272        EscapeError::UnskippedWhitespaceWarning => {
273            let (c, char_span) = last_char();
274            dcx.emit_warn(UnescapeError::UnskippedWhitespace {
275                span: err_span,
276                ch: escaped_char(c),
277                char_span,
278            });
279            return None;
280        }
281        EscapeError::MultipleSkippedLinesWarning => {
282            dcx.emit_warn(UnescapeError::MultipleSkippedLinesWarning(err_span));
283            return None;
284        }
285    })
286}
287
288/// Pushes a character to a message string for error reporting
289pub(crate) fn escaped_char(c: char) -> String {
290    match c {
291        '\u{20}'..='\u{7e}' => {
292            // Don't escape \, ' or " for user-facing messages
293            c.to_string()
294        }
295        _ => c.escape_default().to_string(),
296    }
297}