1use std::ops::Range;
5use std::str::Chars;
6
7use Mode::*;
8
9#[cfg(test)]
10mod tests;
11
12#[derive(Debug, PartialEq, Eq)]
16pub enum EscapeError {
17 ZeroChars,
19 MoreThanOneChar,
21
22 LoneSlash,
24 InvalidEscape,
26 BareCarriageReturn,
28 BareCarriageReturnInRawString,
30 EscapeOnlyChar,
32
33 TooShortHexEscape,
35 InvalidCharInHexEscape,
37 OutOfRangeHexEscape,
39
40 NoBraceInUnicodeEscape,
42 InvalidCharInUnicodeEscape,
44 EmptyUnicodeEscape,
46 UnclosedUnicodeEscape,
48 LeadingUnderscoreUnicodeEscape,
50 OverlongUnicodeEscape,
52 LoneSurrogateUnicodeEscape,
54 OutOfRangeUnicodeEscape,
56
57 UnicodeEscapeInByte,
59 NonAsciiCharInByte,
61
62 NulInCStr,
64
65 UnskippedWhitespaceWarning,
68
69 MultipleSkippedLinesWarning,
71}
72
73impl EscapeError {
74 pub fn is_fatal(&self) -> bool {
76 !matches!(
77 self,
78 EscapeError::UnskippedWhitespaceWarning | EscapeError::MultipleSkippedLinesWarning
79 )
80 }
81}
82
83pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
89where
90 F: FnMut(Range<usize>, Result<char, EscapeError>),
91{
92 match mode {
93 Char | Byte => {
94 let mut chars = src.chars();
95 let res = unescape_char_or_byte(&mut chars, mode);
96 callback(0..(src.len() - chars.as_str().len()), res);
97 }
98 Str | ByteStr => unescape_non_raw_common(src, mode, callback),
99 RawStr | RawByteStr => check_raw_common(src, mode, callback),
100 RawCStr => check_raw_common(src, mode, &mut |r, mut result| {
101 if let Ok('\0') = result {
102 result = Err(EscapeError::NulInCStr);
103 }
104 callback(r, result)
105 }),
106 CStr => unreachable!(),
107 }
108}
109
110pub enum MixedUnit {
113 Char(char),
120
121 HighByte(u8),
127}
128
129impl From<char> for MixedUnit {
130 fn from(c: char) -> Self {
131 MixedUnit::Char(c)
132 }
133}
134
135impl From<u8> for MixedUnit {
136 fn from(n: u8) -> Self {
137 if n.is_ascii() { MixedUnit::Char(n as char) } else { MixedUnit::HighByte(n) }
138 }
139}
140
141pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
146where
147 F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
148{
149 match mode {
150 CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| {
151 if let Ok(MixedUnit::Char('\0')) = result {
152 result = Err(EscapeError::NulInCStr);
153 }
154 callback(r, result)
155 }),
156 Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(),
157 }
158}
159
160pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
163 unescape_char_or_byte(&mut src.chars(), Char)
164}
165
166pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
169 unescape_char_or_byte(&mut src.chars(), Byte).map(byte_from_char)
170}
171
172#[derive(Debug, Clone, Copy, PartialEq)]
174pub enum Mode {
175 Char,
176
177 Byte,
178
179 Str,
180 RawStr,
181
182 ByteStr,
183 RawByteStr,
184
185 CStr,
186 RawCStr,
187}
188
189impl Mode {
190 pub fn in_double_quotes(self) -> bool {
191 match self {
192 Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true,
193 Char | Byte => false,
194 }
195 }
196
197 fn allow_high_bytes(self) -> bool {
199 match self {
200 Char | Str => false,
201 Byte | ByteStr | CStr => true,
202 RawStr | RawByteStr | RawCStr => unreachable!(),
203 }
204 }
205
206 #[inline]
208 fn allow_unicode_chars(self) -> bool {
209 match self {
210 Byte | ByteStr | RawByteStr => false,
211 Char | Str | RawStr | CStr | RawCStr => true,
212 }
213 }
214
215 fn allow_unicode_escapes(self) -> bool {
217 match self {
218 Byte | ByteStr => false,
219 Char | Str | CStr => true,
220 RawByteStr | RawStr | RawCStr => unreachable!(),
221 }
222 }
223
224 pub fn prefix_noraw(self) -> &'static str {
225 match self {
226 Char | Str | RawStr => "",
227 Byte | ByteStr | RawByteStr => "b",
228 CStr | RawCStr => "c",
229 }
230 }
231}
232
233fn scan_escape<T: From<char> + From<u8>>(
234 chars: &mut Chars<'_>,
235 mode: Mode,
236) -> Result<T, EscapeError> {
237 let res: char = match chars.next().ok_or(EscapeError::LoneSlash)? {
239 '"' => '"',
240 'n' => '\n',
241 'r' => '\r',
242 't' => '\t',
243 '\\' => '\\',
244 '\'' => '\'',
245 '0' => '\0',
246 'x' => {
247 let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
250 let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
251
252 let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
253 let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
254
255 let value = (hi * 16 + lo) as u8;
256
257 return if !mode.allow_high_bytes() && !value.is_ascii() {
258 Err(EscapeError::OutOfRangeHexEscape)
259 } else {
260 Ok(T::from(value))
263 };
264 }
265 'u' => return scan_unicode(chars, mode.allow_unicode_escapes()).map(T::from),
266 _ => return Err(EscapeError::InvalidEscape),
267 };
268 Ok(T::from(res))
269}
270
271fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<char, EscapeError> {
272 if chars.next() != Some('{') {
275 return Err(EscapeError::NoBraceInUnicodeEscape);
276 }
277
278 let mut n_digits = 1;
280 let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
281 '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
282 '}' => return Err(EscapeError::EmptyUnicodeEscape),
283 c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
284 };
285
286 loop {
289 match chars.next() {
290 None => return Err(EscapeError::UnclosedUnicodeEscape),
291 Some('_') => continue,
292 Some('}') => {
293 if n_digits > 6 {
294 return Err(EscapeError::OverlongUnicodeEscape);
295 }
296
297 if !allow_unicode_escapes {
300 return Err(EscapeError::UnicodeEscapeInByte);
301 }
302
303 break std::char::from_u32(value).ok_or({
304 if value > 0x10FFFF {
305 EscapeError::OutOfRangeUnicodeEscape
306 } else {
307 EscapeError::LoneSurrogateUnicodeEscape
308 }
309 });
310 }
311 Some(c) => {
312 let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
313 n_digits += 1;
314 if n_digits > 6 {
315 continue;
317 }
318 value = value * 16 + digit;
319 }
320 };
321 }
322}
323
324#[inline]
325fn ascii_check(c: char, allow_unicode_chars: bool) -> Result<char, EscapeError> {
326 if allow_unicode_chars || c.is_ascii() { Ok(c) } else { Err(EscapeError::NonAsciiCharInByte) }
327}
328
329fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
330 let c = chars.next().ok_or(EscapeError::ZeroChars)?;
331 let res = match c {
332 '\\' => scan_escape(chars, mode),
333 '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
334 '\r' => Err(EscapeError::BareCarriageReturn),
335 _ => ascii_check(c, mode.allow_unicode_chars()),
336 }?;
337 if chars.next().is_some() {
338 return Err(EscapeError::MoreThanOneChar);
339 }
340 Ok(res)
341}
342
343fn unescape_non_raw_common<F, T: From<char> + From<u8>>(src: &str, mode: Mode, callback: &mut F)
346where
347 F: FnMut(Range<usize>, Result<T, EscapeError>),
348{
349 let mut chars = src.chars();
350 let allow_unicode_chars = mode.allow_unicode_chars(); while let Some(c) = chars.next() {
356 let start = src.len() - chars.as_str().len() - c.len_utf8();
357 let res = match c {
358 '\\' => {
359 match chars.clone().next() {
360 Some('\n') => {
361 skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
366 callback(range, Err(err))
367 });
368 continue;
369 }
370 _ => scan_escape::<T>(&mut chars, mode),
371 }
372 }
373 '"' => Err(EscapeError::EscapeOnlyChar),
374 '\r' => Err(EscapeError::BareCarriageReturn),
375 _ => ascii_check(c, allow_unicode_chars).map(T::from),
376 };
377 let end = src.len() - chars.as_str().len();
378 callback(start..end, res);
379 }
380}
381
382fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
383where
384 F: FnMut(Range<usize>, EscapeError),
385{
386 let tail = chars.as_str();
387 let first_non_space = tail
388 .bytes()
389 .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
390 .unwrap_or(tail.len());
391 if tail[1..first_non_space].contains('\n') {
392 let end = start + first_non_space + 1;
394 callback(start..end, EscapeError::MultipleSkippedLinesWarning);
395 }
396 let tail = &tail[first_non_space..];
397 if let Some(c) = tail.chars().next() {
398 if c.is_whitespace() {
399 let end = start + first_non_space + c.len_utf8() + 1;
402 callback(start..end, EscapeError::UnskippedWhitespaceWarning);
403 }
404 }
405 *chars = tail.chars();
406}
407
408fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F)
413where
414 F: FnMut(Range<usize>, Result<char, EscapeError>),
415{
416 let mut chars = src.chars();
417 let allow_unicode_chars = mode.allow_unicode_chars(); while let Some(c) = chars.next() {
423 let start = src.len() - chars.as_str().len() - c.len_utf8();
424 let res = match c {
425 '\r' => Err(EscapeError::BareCarriageReturnInRawString),
426 _ => ascii_check(c, allow_unicode_chars),
427 };
428 let end = src.len() - chars.as_str().len();
429 callback(start..end, res);
430 }
431}
432
433#[inline]
434pub fn byte_from_char(c: char) -> u8 {
435 let res = c as u32;
436 debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
437 res as u8
438}