1use std::iter::once;
4use std::ops::Range;
5
6use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed};
7use rustc_lexer::unescape::{EscapeError, Mode};
8use rustc_span::{BytePos, Span};
9use tracing::debug;
10
11use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, UnescapeError};
12
13pub(crate) fn emit_unescape_error(
14 dcx: DiagCtxtHandle<'_>,
15 lit: &str,
17 full_lit_span: Span,
19 err_span: Span,
21 mode: Mode,
22 range: Range<usize>,
24 error: EscapeError,
25) -> Option<ErrorGuaranteed> {
26 debug!(
27 "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
28 lit, full_lit_span, mode, range, error
29 );
30 let last_char = || {
31 let c = lit[range.clone()].chars().next_back().unwrap();
32 let span = err_span.with_lo(err_span.hi() - BytePos(c.len_utf8() as u32));
33 (c, span)
34 };
35 Some(match error {
36 EscapeError::LoneSurrogateUnicodeEscape => {
37 dcx.emit_err(UnescapeError::InvalidUnicodeEscape { span: err_span, surrogate: true })
38 }
39 EscapeError::OutOfRangeUnicodeEscape => {
40 dcx.emit_err(UnescapeError::InvalidUnicodeEscape { span: err_span, surrogate: false })
41 }
42 EscapeError::MoreThanOneChar => {
43 use unicode_normalization::UnicodeNormalization;
44 use unicode_normalization::char::is_combining_mark;
45 let mut sugg = None;
46 let mut note = None;
47
48 let lit_chars = lit.chars().collect::<Vec<_>>();
49 let (first, rest) = lit_chars.split_first().unwrap();
50 if rest.iter().copied().all(is_combining_mark) {
51 let normalized = lit.nfc().to_string();
52 if normalized.chars().count() == 1 {
53 let ch = normalized.chars().next().unwrap().escape_default().to_string();
54 sugg = Some(MoreThanOneCharSugg::NormalizedForm {
55 span: err_span,
56 ch,
57 normalized,
58 });
59 }
60 let escaped_marks =
61 rest.iter().map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
62 note = Some(MoreThanOneCharNote::AllCombining {
63 span: err_span,
64 chr: format!("{first}"),
65 len: escaped_marks.len(),
66 escaped_marks: escaped_marks.join(""),
67 });
68 } else {
69 let printable: Vec<char> = lit
70 .chars()
71 .filter(|&x| {
72 unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
73 && !x.is_whitespace()
74 })
75 .collect();
76
77 if let &[ch] = printable.as_slice() {
78 sugg = Some(MoreThanOneCharSugg::RemoveNonPrinting {
79 span: err_span,
80 ch: ch.to_string(),
81 });
82 note = Some(MoreThanOneCharNote::NonPrinting {
83 span: err_span,
84 escaped: lit.escape_default().to_string(),
85 });
86 }
87 };
88 let sugg = sugg.unwrap_or_else(|| {
89 let prefix = mode.prefix_noraw();
90 let mut escaped = String::with_capacity(lit.len());
91 let mut in_escape = false;
92 for c in lit.chars() {
93 match c {
94 '\\' => in_escape = !in_escape,
95 '"' if !in_escape => escaped.push('\\'),
96 _ => in_escape = false,
97 }
98 escaped.push(c);
99 }
100 if escaped.len() != lit.len() || full_lit_span.is_empty() {
101 let sugg = format!("{prefix}\"{escaped}\"");
102 MoreThanOneCharSugg::QuotesFull {
103 span: full_lit_span,
104 is_byte: mode == Mode::Byte,
105 sugg,
106 }
107 } else {
108 MoreThanOneCharSugg::Quotes {
109 start: full_lit_span
110 .with_hi(full_lit_span.lo() + BytePos((prefix.len() + 1) as u32)),
111 end: full_lit_span.with_lo(full_lit_span.hi() - BytePos(1)),
112 is_byte: mode == Mode::Byte,
113 prefix,
114 }
115 }
116 });
117 dcx.emit_err(UnescapeError::MoreThanOneChar {
118 span: full_lit_span,
119 note,
120 suggestion: sugg,
121 })
122 }
123 EscapeError::EscapeOnlyChar => {
124 let (c, char_span) = last_char();
125 dcx.emit_err(UnescapeError::EscapeOnlyChar {
126 span: err_span,
127 char_span,
128 escaped_sugg: c.escape_default().to_string(),
129 escaped_msg: escaped_char(c),
130 byte: mode == Mode::Byte,
131 })
132 }
133 EscapeError::BareCarriageReturn => {
134 let double_quotes = mode.in_double_quotes();
135 dcx.emit_err(UnescapeError::BareCr { span: err_span, double_quotes })
136 }
137 EscapeError::BareCarriageReturnInRawString => {
138 assert!(mode.in_double_quotes());
139 dcx.emit_err(UnescapeError::BareCrRawString(err_span))
140 }
141 EscapeError::InvalidEscape => {
142 let (c, span) = last_char();
143
144 let label = if mode == Mode::Byte || mode == Mode::ByteStr {
145 "unknown byte escape"
146 } else {
147 "unknown character escape"
148 };
149 let ec = escaped_char(c);
150 let mut diag = dcx.struct_span_err(span, format!("{label}: `{ec}`"));
151 diag.span_label(span, label);
152 if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
153 diag.help(
154 "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
155 );
156 } else if c == '\r' {
157 diag.help(
158 "this is an isolated carriage return; consider checking your editor and \
159 version control settings",
160 );
161 } else {
162 if mode == Mode::Str || mode == Mode::Char {
163 diag.span_suggestion(
164 full_lit_span,
165 "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
166 format!("r\"{lit}\""),
167 Applicability::MaybeIncorrect,
168 );
169 }
170
171 diag.help(
172 "for more information, visit \
173 <https://doc.rust-lang.org/reference/tokens.html#literals>",
174 );
175 }
176 diag.emit()
177 }
178 EscapeError::TooShortHexEscape => dcx.emit_err(UnescapeError::TooShortHexEscape(err_span)),
179 EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
180 let (c, span) = last_char();
181 let is_hex = error == EscapeError::InvalidCharInHexEscape;
182 let ch = escaped_char(c);
183 dcx.emit_err(UnescapeError::InvalidCharInEscape { span, is_hex, ch })
184 }
185 EscapeError::NonAsciiCharInByte => {
186 let (c, span) = last_char();
187 let desc = match mode {
188 Mode::Byte => "byte literal",
189 Mode::ByteStr => "byte string literal",
190 Mode::RawByteStr => "raw byte string literal",
191 _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
192 };
193 let mut err = dcx.struct_span_err(span, format!("non-ASCII character in {desc}"));
194 let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
195 format!(" but is {c:?}")
196 } else {
197 String::new()
198 };
199 err.span_label(span, format!("must be ASCII{postfix}"));
200 if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
203 err.span_suggestion(
204 span,
205 format!(
206 "if you meant to use the unicode code point for {c:?}, use a \\xHH escape"
207 ),
208 format!("\\x{:X}", c as u32),
209 Applicability::MaybeIncorrect,
210 );
211 } else if mode == Mode::Byte {
212 err.span_label(span, "this multibyte character does not fit into a single byte");
213 } else if mode != Mode::RawByteStr {
214 let mut utf8 = String::new();
215 utf8.push(c);
216 err.span_suggestion(
217 span,
218 format!("if you meant to use the UTF-8 encoding of {c:?}, use \\xHH escapes"),
219 utf8.as_bytes()
220 .iter()
221 .map(|b: &u8| format!("\\x{:X}", *b))
222 .fold("".to_string(), |a, c| a + &c),
223 Applicability::MaybeIncorrect,
224 );
225 }
226 err.emit()
227 }
228 EscapeError::OutOfRangeHexEscape => {
229 dcx.emit_err(UnescapeError::OutOfRangeHexEscape(err_span))
230 }
231 EscapeError::LeadingUnderscoreUnicodeEscape => {
232 let (c, span) = last_char();
233 dcx.emit_err(UnescapeError::LeadingUnderscoreUnicodeEscape {
234 span,
235 ch: escaped_char(c),
236 })
237 }
238 EscapeError::OverlongUnicodeEscape => {
239 dcx.emit_err(UnescapeError::OverlongUnicodeEscape(err_span))
240 }
241 EscapeError::UnclosedUnicodeEscape => {
242 dcx.emit_err(UnescapeError::UnclosedUnicodeEscape(err_span, err_span.shrink_to_hi()))
243 }
244 EscapeError::NoBraceInUnicodeEscape => {
245 let mut suggestion = "\\u{".to_owned();
246 let mut suggestion_len = 0;
247 let (c, char_span) = last_char();
248 let chars = once(c).chain(lit[range.end..].chars());
249 for c in chars.take(6).take_while(|c| c.is_digit(16)) {
250 suggestion.push(c);
251 suggestion_len += c.len_utf8();
252 }
253
254 let (label, sub) = if suggestion_len > 0 {
255 suggestion.push('}');
256 let hi = char_span.lo() + BytePos(suggestion_len as u32);
257 (None, NoBraceUnicodeSub::Suggestion { span: err_span.with_hi(hi), suggestion })
258 } else {
259 (Some(err_span), NoBraceUnicodeSub::Help)
260 };
261 dcx.emit_err(UnescapeError::NoBraceInUnicodeEscape { span: err_span, label, sub })
262 }
263 EscapeError::UnicodeEscapeInByte => {
264 dcx.emit_err(UnescapeError::UnicodeEscapeInByte(err_span))
265 }
266 EscapeError::EmptyUnicodeEscape => {
267 dcx.emit_err(UnescapeError::EmptyUnicodeEscape(err_span))
268 }
269 EscapeError::ZeroChars => dcx.emit_err(UnescapeError::ZeroChars(err_span)),
270 EscapeError::LoneSlash => dcx.emit_err(UnescapeError::LoneSlash(err_span)),
271 EscapeError::NulInCStr => dcx.emit_err(UnescapeError::NulInCStr { span: err_span }),
272 EscapeError::UnskippedWhitespaceWarning => {
273 let (c, char_span) = last_char();
274 dcx.emit_warn(UnescapeError::UnskippedWhitespace {
275 span: err_span,
276 ch: escaped_char(c),
277 char_span,
278 });
279 return None;
280 }
281 EscapeError::MultipleSkippedLinesWarning => {
282 dcx.emit_warn(UnescapeError::MultipleSkippedLinesWarning(err_span));
283 return None;
284 }
285 })
286}
287
288pub(crate) fn escaped_char(c: char) -> String {
290 match c {
291 '\u{20}'..='\u{7e}' => {
292 c.to_string()
294 }
295 _ => c.escape_default().to_string(),
296 }
297}