Skip to main content

rustc_parse/lexer/
mod.rs

1use diagnostics::make_errors_for_mismatched_closing_delims;
2use rustc_ast::ast::{self, AttrStyle};
3use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
4use rustc_ast::tokenstream::TokenStream;
5use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars};
6use rustc_errors::codes::*;
7use rustc_errors::{Applicability, Diag, DiagCtxtHandle, Diagnostic, StashKey};
8use rustc_lexer::{
9    Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace,
10};
11use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
12use rustc_session::lint::builtin::{
13    RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
14    TEXT_DIRECTION_CODEPOINT_IN_COMMENT, TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
15};
16use rustc_session::parse::ParseSess;
17use rustc_span::{BytePos, Pos, Span, Symbol, sym};
18use tracing::debug;
19
20use crate::errors;
21use crate::lexer::diagnostics::TokenTreeDiagInfo;
22use crate::lexer::unicode_chars::UNICODE_ARRAY;
23
24mod diagnostics;
25mod tokentrees;
26mod unescape_error_reporting;
27mod unicode_chars;
28
29use unescape_error_reporting::{emit_unescape_error, escaped_char};
30
31// This type is used a lot. Make sure it doesn't unintentionally get bigger.
32//
33// This assertion is in this crate, rather than in `rustc_lexer`, because that
34// crate cannot depend on `rustc_data_structures`.
35#[cfg(target_pointer_width = "64")]
36const _: [(); 12] = [(); ::std::mem::size_of::<rustc_lexer::Token>()];rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);
37
38const INVISIBLE_CHARACTERS: [char; 8] = [
39    '\u{200b}', '\u{200c}', '\u{2060}', '\u{2061}', '\u{2062}', '\u{00ad}', '\u{034f}', '\u{061c}',
40];
41
42#[derive(#[automatically_derived]
impl ::core::clone::Clone for UnmatchedDelim {
    #[inline]
    fn clone(&self) -> UnmatchedDelim {
        UnmatchedDelim {
            found_delim: ::core::clone::Clone::clone(&self.found_delim),
            found_span: ::core::clone::Clone::clone(&self.found_span),
            unclosed_span: ::core::clone::Clone::clone(&self.unclosed_span),
            candidate_span: ::core::clone::Clone::clone(&self.candidate_span),
        }
    }
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for UnmatchedDelim {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field4_finish(f,
            "UnmatchedDelim", "found_delim", &self.found_delim, "found_span",
            &self.found_span, "unclosed_span", &self.unclosed_span,
            "candidate_span", &&self.candidate_span)
    }
}Debug)]
43pub(crate) struct UnmatchedDelim {
44    pub found_delim: Option<Delimiter>,
45    pub found_span: Span,
46    pub unclosed_span: Option<Span>,
47    pub candidate_span: Option<Span>,
48}
49
50/// Which tokens should be stripped before lexing the tokens.
51pub enum StripTokens {
52    /// Strip both shebang and frontmatter.
53    ShebangAndFrontmatter,
54    /// Strip the shebang but not frontmatter.
55    ///
56    /// That means that char sequences looking like frontmatter are simply
57    /// interpreted as regular Rust lexemes.
58    Shebang,
59    /// Strip nothing.
60    ///
61    /// In other words, char sequences looking like a shebang or frontmatter
62    /// are simply interpreted as regular Rust lexemes.
63    Nothing,
64}
65
66pub(crate) fn lex_token_trees<'psess, 'src>(
67    psess: &'psess ParseSess,
68    mut src: &'src str,
69    mut start_pos: BytePos,
70    override_span: Option<Span>,
71    strip_tokens: StripTokens,
72) -> Result<TokenStream, Vec<Diag<'psess>>> {
73    match strip_tokens {
74        StripTokens::Shebang | StripTokens::ShebangAndFrontmatter => {
75            if let Some(shebang_len) = rustc_lexer::strip_shebang(src) {
76                src = &src[shebang_len..];
77                start_pos = start_pos + BytePos::from_usize(shebang_len);
78            }
79        }
80        StripTokens::Nothing => {}
81    }
82
83    let frontmatter_allowed = match strip_tokens {
84        StripTokens::ShebangAndFrontmatter => FrontmatterAllowed::Yes,
85        StripTokens::Shebang | StripTokens::Nothing => FrontmatterAllowed::No,
86    };
87
88    let cursor = Cursor::new(src, frontmatter_allowed);
89    let mut lexer = Lexer {
90        psess,
91        start_pos,
92        pos: start_pos,
93        src,
94        cursor,
95        override_span,
96        nbsp_is_whitespace: false,
97        last_lifetime: None,
98        token: Token::dummy(),
99        diag_info: TokenTreeDiagInfo::default(),
100    };
101    let res = lexer.lex_token_trees(/* is_delimited */ false);
102
103    let mut unmatched_closing_delims: Vec<_> =
104        make_errors_for_mismatched_closing_delims(&lexer.diag_info.unmatched_delims, psess);
105
106    match res {
107        Ok((_open_spacing, stream)) => {
108            if unmatched_closing_delims.is_empty() {
109                Ok(stream)
110            } else {
111                // Return error if there are unmatched delimiters or unclosed delimiters.
112                Err(unmatched_closing_delims)
113            }
114        }
115        Err(errs) => {
116            // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
117            // because the delimiter mismatch is more likely to be the root cause of error
118            unmatched_closing_delims.extend(errs);
119            Err(unmatched_closing_delims)
120        }
121    }
122}
123
124struct Lexer<'psess, 'src> {
125    psess: &'psess ParseSess,
126    /// Initial position, read-only.
127    start_pos: BytePos,
128    /// The absolute offset within the source_map of the current character.
129    pos: BytePos,
130    /// Source text to tokenize.
131    src: &'src str,
132    /// Cursor for getting lexer tokens.
133    cursor: Cursor<'src>,
134    override_span: Option<Span>,
135    /// When a "unknown start of token: \u{a0}" has already been emitted earlier
136    /// in this file, it's safe to treat further occurrences of the non-breaking
137    /// space character as whitespace.
138    nbsp_is_whitespace: bool,
139
140    /// Track the `Span` for the leading `'` of the last lifetime. Used for
141    /// diagnostics to detect possible typo where `"` was meant.
142    last_lifetime: Option<Span>,
143
144    /// The current token.
145    token: Token,
146
147    diag_info: TokenTreeDiagInfo,
148}
149
150impl<'psess, 'src> Lexer<'psess, 'src> {
151    fn dcx(&self) -> DiagCtxtHandle<'psess> {
152        self.psess.dcx()
153    }
154
155    fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
156        self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
157    }
158
159    /// Returns the next token, paired with a bool indicating if the token was
160    /// preceded by whitespace.
161    fn next_token_from_cursor(&mut self) -> (Token, bool) {
162        let mut preceded_by_whitespace = false;
163        let mut swallow_next_invalid = 0;
164        // Skip trivial (whitespace & comments) tokens
165        loop {
166            let str_before = self.cursor.as_str();
167            let token = self.cursor.advance_token();
168            let start = self.pos;
169            self.pos = self.pos + BytePos(token.len);
170
171            {
    use ::tracing::__macro_support::Callsite as _;
    static __CALLSITE: ::tracing::callsite::DefaultCallsite =
        {
            static META: ::tracing::Metadata<'static> =
                {
                    ::tracing_core::metadata::Metadata::new("event compiler/rustc_parse/src/lexer/mod.rs:171",
                        "rustc_parse::lexer", ::tracing::Level::DEBUG,
                        ::tracing_core::__macro_support::Option::Some("compiler/rustc_parse/src/lexer/mod.rs"),
                        ::tracing_core::__macro_support::Option::Some(171u32),
                        ::tracing_core::__macro_support::Option::Some("rustc_parse::lexer"),
                        ::tracing_core::field::FieldSet::new(&["message"],
                            ::tracing_core::callsite::Identifier(&__CALLSITE)),
                        ::tracing::metadata::Kind::EVENT)
                };
            ::tracing::callsite::DefaultCallsite::new(&META)
        };
    let enabled =
        ::tracing::Level::DEBUG <= ::tracing::level_filters::STATIC_MAX_LEVEL
                &&
                ::tracing::Level::DEBUG <=
                    ::tracing::level_filters::LevelFilter::current() &&
            {
                let interest = __CALLSITE.interest();
                !interest.is_never() &&
                    ::tracing::__macro_support::__is_enabled(__CALLSITE.metadata(),
                        interest)
            };
    if enabled {
        (|value_set: ::tracing::field::ValueSet|
                    {
                        let meta = __CALLSITE.metadata();
                        ::tracing::Event::dispatch(meta, &value_set);
                        ;
                    })({
                #[allow(unused_imports)]
                use ::tracing::field::{debug, display, Value};
                let mut iter = __CALLSITE.metadata().fields().iter();
                __CALLSITE.metadata().fields().value_set(&[(&::tracing::__macro_support::Iterator::next(&mut iter).expect("FieldSet corrupted (this is a bug)"),
                                    ::tracing::__macro_support::Option::Some(&format_args!("next_token: {0:?}({1:?})",
                                                    token.kind, self.str_from(start)) as &dyn Value))])
            });
    } else { ; }
};debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
172
173            if let rustc_lexer::TokenKind::Semi
174            | rustc_lexer::TokenKind::LineComment { .. }
175            | rustc_lexer::TokenKind::BlockComment { .. }
176            | rustc_lexer::TokenKind::CloseParen
177            | rustc_lexer::TokenKind::CloseBrace
178            | rustc_lexer::TokenKind::CloseBracket = token.kind
179            {
180                // Heuristic: we assume that it is unlikely we're dealing with an unterminated
181                // string surrounded by single quotes.
182                self.last_lifetime = None;
183            }
184
185            // Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
186            // rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
187            // additional validation.
188            let kind = match token.kind {
189                rustc_lexer::TokenKind::LineComment { doc_style } => {
190                    // Skip non-doc comments
191                    let Some(doc_style) = doc_style else {
192                        self.lint_unicode_text_flow(start);
193                        preceded_by_whitespace = true;
194                        continue;
195                    };
196
197                    // Opening delimiter of the length 3 is not included into the symbol.
198                    let content_start = start + BytePos(3);
199                    let content = self.str_from(content_start);
200                    self.lint_doc_comment_unicode_text_flow(start, content);
201                    self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
202                }
203                rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
204                    if !terminated {
205                        self.report_unterminated_block_comment(start, doc_style);
206                    }
207
208                    // Skip non-doc comments
209                    let Some(doc_style) = doc_style else {
210                        self.lint_unicode_text_flow(start);
211                        preceded_by_whitespace = true;
212                        continue;
213                    };
214
215                    // Opening delimiter of the length 3 and closing delimiter of the length 2
216                    // are not included into the symbol.
217                    let content_start = start + BytePos(3);
218                    let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
219                    let content = self.str_from_to(content_start, content_end);
220                    self.lint_doc_comment_unicode_text_flow(start, content);
221                    self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
222                }
223                rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
224                    self.validate_frontmatter(start, has_invalid_preceding_whitespace, invalid_infostring);
225                    preceded_by_whitespace = true;
226                    continue;
227                }
228                rustc_lexer::TokenKind::Whitespace => {
229                    preceded_by_whitespace = true;
230                    continue;
231                }
232                rustc_lexer::TokenKind::Ident => self.ident(start),
233                rustc_lexer::TokenKind::RawIdent => {
234                    let sym = nfc_normalize(self.str_from(start + BytePos(2)));
235                    let span = self.mk_sp(start, self.pos);
236                    self.psess.symbol_gallery.insert(sym, span);
237                    if !sym.can_be_raw() {
238                        self.dcx().emit_err(errors::CannotBeRawIdent { span, ident: sym });
239                    }
240                    self.psess.raw_identifier_spans.push(span);
241                    token::Ident(sym, IdentIsRaw::Yes)
242                }
243                rustc_lexer::TokenKind::UnknownPrefix => {
244                    self.report_unknown_prefix(start);
245                    self.ident(start)
246                }
247                rustc_lexer::TokenKind::UnknownPrefixLifetime => {
248                    self.report_unknown_prefix(start);
249                    // Include the leading `'` in the real identifier, for macro
250                    // expansion purposes. See #12512 for the gory details of why
251                    // this is necessary.
252                    let lifetime_name = self.str_from(start);
253                    self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
254                    let ident = Symbol::intern(lifetime_name);
255                    token::Lifetime(ident, IdentIsRaw::No)
256                }
257                rustc_lexer::TokenKind::InvalidIdent
258                    // Do not recover an identifier with emoji if the codepoint is a confusable
259                    // with a recoverable substitution token, like `➖`.
260                    if !UNICODE_ARRAY.iter().any(|&(c, _, _)| {
261                        let sym = self.str_from(start);
262                        sym.chars().count() == 1 && c == sym.chars().next().unwrap()
263                    }) =>
264                {
265                    let sym = nfc_normalize(self.str_from(start));
266                    let span = self.mk_sp(start, self.pos);
267                    self.psess
268                        .bad_unicode_identifiers
269                        .borrow_mut()
270                        .entry(sym)
271                        .or_default()
272                        .push(span);
273                    token::Ident(sym, IdentIsRaw::No)
274                }
275                // split up (raw) c string literals to an ident and a string literal when edition <
276                // 2021.
277                rustc_lexer::TokenKind::Literal {
278                    kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
279                    suffix_start: _,
280                } if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
281                    let prefix_len = match kind {
282                        LiteralKind::CStr { .. } => 1,
283                        LiteralKind::RawCStr { .. } => 2,
284                        _ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
285                    };
286
287                    // reset the state so that only the prefix ("c" or "cr")
288                    // was consumed.
289                    let lit_start = start + BytePos(prefix_len);
290                    self.pos = lit_start;
291                    self.cursor = Cursor::new(&str_before[prefix_len as usize..], FrontmatterAllowed::No);
292                    self.report_unknown_prefix(start);
293                    let prefix_span = self.mk_sp(start, lit_start);
294                    return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
295                }
296                rustc_lexer::TokenKind::GuardedStrPrefix => {
297                    self.maybe_report_guarded_str(start, str_before)
298                }
299                rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
300                    let suffix_start = start + BytePos(suffix_start);
301                    let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
302                    let suffix = if suffix_start < self.pos {
303                        let string = self.str_from(suffix_start);
304                        if string == "_" {
305                            self.dcx().emit_err(errors::UnderscoreLiteralSuffix {
306                                span: self.mk_sp(suffix_start, self.pos),
307                            });
308                            None
309                        } else {
310                            Some(Symbol::intern(string))
311                        }
312                    } else {
313                        None
314                    };
315                    self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal");
316                    token::Literal(token::Lit { kind, symbol, suffix })
317                }
318                rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
319                    // Include the leading `'` in the real identifier, for macro
320                    // expansion purposes. See #12512 for the gory details of why
321                    // this is necessary.
322                    let lifetime_name = nfc_normalize(self.str_from(start));
323                    self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
324                    if starts_with_number {
325                        let span = self.mk_sp(start, self.pos);
326                        self.dcx()
327                            .struct_err("lifetimes cannot start with a number")
328                            .with_span(span)
329                            .stash(span, StashKey::LifetimeIsChar);
330                    }
331                    token::Lifetime(lifetime_name, IdentIsRaw::No)
332                }
333                rustc_lexer::TokenKind::RawLifetime => {
334                    self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
335
336                    let ident_start = start + BytePos(3);
337                    let prefix_span = self.mk_sp(start, ident_start);
338
339                    if prefix_span.at_least_rust_2021() {
340                        // If the raw lifetime is followed by \' then treat it a normal
341                        // lifetime followed by a \', which is to interpret it as a character
342                        // literal. In this case, it's always an invalid character literal
343                        // since the literal must necessarily have >3 characters (r#...) inside
344                        // of it, which is invalid.
345                        if self.cursor.as_str().starts_with('\'') {
346                            let lit_span = self.mk_sp(start, self.pos + BytePos(1));
347                            let contents = self.str_from_to(start + BytePos(1), self.pos);
348                            emit_unescape_error(
349                                self.dcx(),
350                                contents,
351                                lit_span,
352                                lit_span,
353                                Mode::Char,
354                                0..contents.len(),
355                                EscapeError::MoreThanOneChar,
356                            )
357                            .expect("expected error");
358                        }
359
360                        let span = self.mk_sp(start, self.pos);
361
362                        let lifetime_name_without_tick =
363                            Symbol::intern(&self.str_from(ident_start));
364                        if !lifetime_name_without_tick.can_be_raw() {
365                            self.dcx().emit_err(
366                                errors::CannotBeRawLifetime {
367                                    span,
368                                    ident: lifetime_name_without_tick
369                                }
370                            );
371                        }
372
373                        // Put the `'` back onto the lifetime name.
374                        let mut lifetime_name =
375                            String::with_capacity(lifetime_name_without_tick.as_str().len() + 1);
376                        lifetime_name.push('\'');
377                        lifetime_name += lifetime_name_without_tick.as_str();
378                        let sym = nfc_normalize(&lifetime_name);
379
380                        // Make sure we mark this as a raw identifier.
381                        self.psess.raw_identifier_spans.push(span);
382
383                        token::Lifetime(sym, IdentIsRaw::Yes)
384                    } else {
385                        // Otherwise, this should be parsed like `'r`. Warn about it though.
386                        self.psess.buffer_lint(
387                            RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
388                            prefix_span,
389                            ast::CRATE_NODE_ID,
390                            errors::RawPrefix {
391                                label: prefix_span,
392                                suggestion: prefix_span.shrink_to_hi()
393                            },
394                        );
395
396                        // Reset the state so we just lex the `'r`.
397                        let lt_start = start + BytePos(2);
398                        self.pos = lt_start;
399                        self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No);
400
401                        let lifetime_name = nfc_normalize(self.str_from(start));
402                        token::Lifetime(lifetime_name, IdentIsRaw::No)
403                    }
404                }
405                rustc_lexer::TokenKind::Semi => token::Semi,
406                rustc_lexer::TokenKind::Comma => token::Comma,
407                rustc_lexer::TokenKind::Dot => token::Dot,
408                rustc_lexer::TokenKind::OpenParen => token::OpenParen,
409                rustc_lexer::TokenKind::CloseParen => token::CloseParen,
410                rustc_lexer::TokenKind::OpenBrace => token::OpenBrace,
411                rustc_lexer::TokenKind::CloseBrace => token::CloseBrace,
412                rustc_lexer::TokenKind::OpenBracket => token::OpenBracket,
413                rustc_lexer::TokenKind::CloseBracket => token::CloseBracket,
414                rustc_lexer::TokenKind::At => token::At,
415                rustc_lexer::TokenKind::Pound => token::Pound,
416                rustc_lexer::TokenKind::Tilde => token::Tilde,
417                rustc_lexer::TokenKind::Question => token::Question,
418                rustc_lexer::TokenKind::Colon => token::Colon,
419                rustc_lexer::TokenKind::Dollar => token::Dollar,
420                rustc_lexer::TokenKind::Eq => token::Eq,
421                rustc_lexer::TokenKind::Bang => token::Bang,
422                rustc_lexer::TokenKind::Lt => token::Lt,
423                rustc_lexer::TokenKind::Gt => token::Gt,
424                rustc_lexer::TokenKind::Minus => token::Minus,
425                rustc_lexer::TokenKind::And => token::And,
426                rustc_lexer::TokenKind::Or => token::Or,
427                rustc_lexer::TokenKind::Plus => token::Plus,
428                rustc_lexer::TokenKind::Star => token::Star,
429                rustc_lexer::TokenKind::Slash => token::Slash,
430                rustc_lexer::TokenKind::Caret => token::Caret,
431                rustc_lexer::TokenKind::Percent => token::Percent,
432
433                rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
434                    // Don't emit diagnostics for sequences of the same invalid token
435                    if swallow_next_invalid > 0 {
436                        swallow_next_invalid -= 1;
437                        continue;
438                    }
439                    let mut it = self.str_from_to_end(start).chars();
440                    let c = it.next().unwrap();
441                    if c == '\u{00a0}' {
442                        // If an error has already been reported on non-breaking
443                        // space characters earlier in the file, treat all
444                        // subsequent occurrences as whitespace.
445                        if self.nbsp_is_whitespace {
446                            preceded_by_whitespace = true;
447                            continue;
448                        }
449                        self.nbsp_is_whitespace = true;
450                    }
451                    let repeats = it.take_while(|c1| *c1 == c).count();
452                    // FIXME: the lexer could be used to turn the ASCII version of unicode
453                    // homoglyphs, instead of keeping a table in `check_for_substitution`into the
454                    // token. Ideally, this should be inside `rustc_lexer`. However, we should
455                    // first remove compound tokens like `<<` from `rustc_lexer`, and then add
456                    // fancier error recovery to it, as there will be less overall work to do this
457                    // way.
458                    let (token, sugg) =
459                        unicode_chars::check_for_substitution(self, start, c, repeats + 1);
460                    self.dcx().emit_err(errors::UnknownTokenStart {
461                        span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
462                        escaped: escaped_char(c),
463                        sugg,
464                        null: c == '\x00',
465                        invisible: INVISIBLE_CHARACTERS.contains(&c),
466                        repeat: if repeats > 0 {
467                            swallow_next_invalid = repeats;
468                            Some(errors::UnknownTokenRepeat { repeats })
469                        } else {
470                            None
471                        },
472                    });
473
474                    if let Some(token) = token {
475                        token
476                    } else {
477                        preceded_by_whitespace = true;
478                        continue;
479                    }
480                }
481                rustc_lexer::TokenKind::Eof => token::Eof,
482            };
483            let span = self.mk_sp(start, self.pos);
484            return (Token::new(kind, span), preceded_by_whitespace);
485        }
486    }
487
488    fn ident(&self, start: BytePos) -> TokenKind {
489        let sym = nfc_normalize(self.str_from(start));
490        let span = self.mk_sp(start, self.pos);
491        self.psess.symbol_gallery.insert(sym, span);
492        token::Ident(sym, IdentIsRaw::No)
493    }
494
495    /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
496    /// complain about it.
497    fn lint_unicode_text_flow(&self, start: BytePos) {
498        // Opening delimiter of the length 2 is not included into the comment text.
499        let content_start = start + BytePos(2);
500        let content = self.str_from(content_start);
501        if contains_text_flow_control_chars(content) {
502            let span = self.mk_sp(start, self.pos);
503            let content = content.to_string();
504            self.psess.dyn_buffer_lint(
505                TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
506                span,
507                ast::CRATE_NODE_ID,
508                move |dcx, level| {
509                    let spans: Vec<_> = content
510                        .char_indices()
511                        .filter_map(|(i, c)| {
512                            TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
513                                let lo = span.lo() + BytePos(2 + i as u32);
514                                (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
515                            })
516                        })
517                        .collect();
518                    let characters = spans
519                        .iter()
520                        .map(|&(c, span)| errors::UnicodeCharNoteSub {
521                            span,
522                            c_debug: ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("{0:?}", c))
    })format!("{c:?}"),
523                        })
524                        .collect();
525                    let suggestions =
526                        (!spans.is_empty()).then_some(errors::UnicodeTextFlowSuggestion {
527                            spans: spans.iter().map(|(_c, span)| *span).collect(),
528                        });
529
530                    errors::UnicodeTextFlow {
531                        comment_span: span,
532                        characters,
533                        suggestions,
534                        num_codepoints: spans.len(),
535                    }
536                    .into_diag(dcx, level)
537                },
538            );
539        }
540    }
541
542    fn lint_doc_comment_unicode_text_flow(&mut self, start: BytePos, content: &str) {
543        if contains_text_flow_control_chars(content) {
544            self.report_text_direction_codepoint(
545                content,
546                self.mk_sp(start, self.pos),
547                0,
548                false,
549                true,
550                "doc comment",
551            );
552        }
553    }
554
555    fn lint_literal_unicode_text_flow(
556        &mut self,
557        text: Symbol,
558        lit_kind: token::LitKind,
559        span: Span,
560        label: &'static str,
561    ) {
562        if !contains_text_flow_control_chars(text.as_str()) {
563            return;
564        }
565        let (padding, point_at_inner_spans) = match lit_kind {
566            // account for `"` or `'`
567            token::LitKind::Str | token::LitKind::Char => (1, true),
568            // account for `c"`
569            token::LitKind::CStr => (2, true),
570            // account for `r###"`
571            token::LitKind::StrRaw(n) => (n as u32 + 2, true),
572            // account for `cr###"`
573            token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
574            // suppress bad literals.
575            token::LitKind::Err(_) => return,
576            // Be conservative just in case new literals do support these.
577            _ => (0, false),
578        };
579        self.report_text_direction_codepoint(
580            text.as_str(),
581            span,
582            padding,
583            point_at_inner_spans,
584            false,
585            label,
586        );
587    }
588
589    fn report_text_direction_codepoint(
590        &self,
591        text: &str,
592        span: Span,
593        padding: u32,
594        point_at_inner_spans: bool,
595        is_doc_comment: bool,
596        label: &str,
597    ) {
598        // Obtain the `Span`s for each of the forbidden chars.
599        let spans: Vec<_> = text
600            .char_indices()
601            .filter_map(|(i, c)| {
602                TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
603                    let lo = span.lo() + BytePos(i as u32 + padding);
604                    (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
605                })
606            })
607            .collect();
608
609        let label = label.to_string();
610        let count = spans.len();
611        let labels = point_at_inner_spans
612            .then_some(errors::HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() });
613        let sub = if point_at_inner_spans && !spans.is_empty() {
614            errors::HiddenUnicodeCodepointsDiagSub::Escape { spans }
615        } else {
616            errors::HiddenUnicodeCodepointsDiagSub::NoEscape { spans, is_doc_comment }
617        };
618
619        self.psess.buffer_lint(
620            TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
621            span,
622            ast::CRATE_NODE_ID,
623            errors::HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub },
624        );
625    }
626
627    fn validate_frontmatter(
628        &self,
629        start: BytePos,
630        has_invalid_preceding_whitespace: bool,
631        invalid_infostring: bool,
632    ) {
633        let s = self.str_from(start);
634        let real_start = s.find("---").unwrap();
635        let frontmatter_opening_pos = BytePos(real_start as u32) + start;
636        let real_s = &s[real_start..];
637        let within = real_s.trim_start_matches('-');
638        let len_opening = real_s.len() - within.len();
639
640        let frontmatter_opening_end_pos = frontmatter_opening_pos + BytePos(len_opening as u32);
641        if has_invalid_preceding_whitespace {
642            let line_start =
643                BytePos(s[..real_start].rfind("\n").map_or(0, |i| i as u32 + 1)) + start;
644            let span = self.mk_sp(line_start, frontmatter_opening_end_pos);
645            let label_span = self.mk_sp(line_start, frontmatter_opening_pos);
646            self.dcx().emit_err(errors::FrontmatterInvalidOpeningPrecedingWhitespace {
647                span,
648                note_span: label_span,
649            });
650        }
651
652        let line_end = real_s.find('\n').unwrap_or(real_s.len());
653        if invalid_infostring {
654            let span = self.mk_sp(
655                frontmatter_opening_end_pos,
656                frontmatter_opening_pos + BytePos(line_end as u32),
657            );
658            self.dcx().emit_err(errors::FrontmatterInvalidInfostring { span });
659        }
660
661        let last_line_start = real_s.rfind('\n').map_or(line_end, |i| i + 1);
662
663        let content = &real_s[line_end..last_line_start];
664        if let Some(cr_offset) = content.find('\r') {
665            let cr_pos = start + BytePos((real_start + line_end + cr_offset) as u32);
666            let span = self.mk_sp(cr_pos, cr_pos + BytePos(1 as u32));
667            self.dcx().emit_err(errors::BareCrFrontmatter { span });
668        }
669
670        let last_line = &real_s[last_line_start..];
671        let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace);
672        let last_line_start_pos = frontmatter_opening_pos + BytePos(last_line_start as u32);
673
674        let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
675        self.psess.gated_spans.gate(sym::frontmatter, frontmatter_span);
676
677        if !last_line_trimmed.starts_with("---") {
678            let label_span = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
679            self.dcx().emit_err(errors::FrontmatterUnclosed {
680                span: frontmatter_span,
681                note_span: label_span,
682            });
683            return;
684        }
685
686        if last_line_trimmed.len() != last_line.len() {
687            let line_end = last_line_start_pos + BytePos(last_line.len() as u32);
688            let span = self.mk_sp(last_line_start_pos, line_end);
689            let whitespace_end =
690                last_line_start_pos + BytePos((last_line.len() - last_line_trimmed.len()) as u32);
691            let label_span = self.mk_sp(last_line_start_pos, whitespace_end);
692            self.dcx().emit_err(errors::FrontmatterInvalidClosingPrecedingWhitespace {
693                span,
694                note_span: label_span,
695            });
696        }
697
698        let rest = last_line_trimmed.trim_start_matches('-');
699        let len_close = last_line_trimmed.len() - rest.len();
700        if len_close != len_opening {
701            let span = self.mk_sp(frontmatter_opening_pos, self.pos);
702            let opening = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
703            let last_line_close_pos = last_line_start_pos + BytePos(len_close as u32);
704            let close = self.mk_sp(last_line_start_pos, last_line_close_pos);
705            self.dcx().emit_err(errors::FrontmatterLengthMismatch {
706                span,
707                opening,
708                close,
709                len_opening,
710                len_close,
711            });
712        }
713
714        // Only up to 255 `-`s are allowed in code fences
715        if u8::try_from(len_opening).is_err() {
716            self.dcx().emit_err(errors::FrontmatterTooManyDashes { len_opening });
717        }
718
719        if !rest.trim_matches(is_horizontal_whitespace).is_empty() {
720            let span = self.mk_sp(last_line_start_pos, self.pos);
721            self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
722        }
723    }
724
725    fn cook_doc_comment(
726        &self,
727        content_start: BytePos,
728        content: &str,
729        comment_kind: CommentKind,
730        doc_style: DocStyle,
731    ) -> TokenKind {
732        if content.contains('\r') {
733            for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
734                let span = self.mk_sp(
735                    content_start + BytePos(idx as u32),
736                    content_start + BytePos(idx as u32 + 1),
737                );
738                let block = #[allow(non_exhaustive_omitted_patterns)] match comment_kind {
    CommentKind::Block => true,
    _ => false,
}matches!(comment_kind, CommentKind::Block);
739                self.dcx().emit_err(errors::CrDocComment { span, block });
740            }
741        }
742
743        let attr_style = match doc_style {
744            DocStyle::Outer => AttrStyle::Outer,
745            DocStyle::Inner => AttrStyle::Inner,
746        };
747
748        token::DocComment(comment_kind, attr_style, Symbol::intern(content))
749    }
750
751    fn cook_lexer_literal(
752        &self,
753        start: BytePos,
754        end: BytePos,
755        kind: rustc_lexer::LiteralKind,
756    ) -> (token::LitKind, Symbol) {
757        match kind {
758            rustc_lexer::LiteralKind::Char { terminated } => {
759                if !terminated {
760                    let mut err = self
761                        .dcx()
762                        .struct_span_fatal(self.mk_sp(start, end), "unterminated character literal")
763                        .with_code(E0762);
764                    if let Some(lt_sp) = self.last_lifetime {
765                        err.multipart_suggestion(
766                            "if you meant to write a string literal, use double quotes",
767                            ::alloc::boxed::box_assume_init_into_vec_unsafe(::alloc::intrinsics::write_box_via_move(::alloc::boxed::Box::new_uninit(),
        [(lt_sp, "\"".to_string()),
                (self.mk_sp(start, start + BytePos(1)), "\"".to_string())]))vec![
768                                (lt_sp, "\"".to_string()),
769                                (self.mk_sp(start, start + BytePos(1)), "\"".to_string()),
770                            ],
771                            Applicability::MaybeIncorrect,
772                        );
773                    }
774                    err.emit()
775                }
776                self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
777            }
778            rustc_lexer::LiteralKind::Byte { terminated } => {
779                if !terminated {
780                    self.dcx()
781                        .struct_span_fatal(
782                            self.mk_sp(start + BytePos(1), end),
783                            "unterminated byte constant",
784                        )
785                        .with_code(E0763)
786                        .emit()
787                }
788                self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
789            }
790            rustc_lexer::LiteralKind::Str { terminated } => {
791                if !terminated {
792                    self.dcx()
793                        .struct_span_fatal(
794                            self.mk_sp(start, end),
795                            "unterminated double quote string",
796                        )
797                        .with_code(E0765)
798                        .emit()
799                }
800                self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
801            }
802            rustc_lexer::LiteralKind::ByteStr { terminated } => {
803                if !terminated {
804                    self.dcx()
805                        .struct_span_fatal(
806                            self.mk_sp(start + BytePos(1), end),
807                            "unterminated double quote byte string",
808                        )
809                        .with_code(E0766)
810                        .emit()
811                }
812                self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1)
813                // b" "
814            }
815            rustc_lexer::LiteralKind::CStr { terminated } => {
816                if !terminated {
817                    self.dcx()
818                        .struct_span_fatal(
819                            self.mk_sp(start + BytePos(1), end),
820                            "unterminated C string",
821                        )
822                        .with_code(E0767)
823                        .emit()
824                }
825                self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
826            }
827            rustc_lexer::LiteralKind::RawStr { n_hashes } => {
828                if let Some(n_hashes) = n_hashes {
829                    let n = u32::from(n_hashes);
830                    let kind = token::StrRaw(n_hashes);
831                    self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n)
832                // r##" "##
833                } else {
834                    self.report_raw_str_error(start, 1);
835                }
836            }
837            rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
838                if let Some(n_hashes) = n_hashes {
839                    let n = u32::from(n_hashes);
840                    let kind = token::ByteStrRaw(n_hashes);
841                    self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n)
842                // br##" "##
843                } else {
844                    self.report_raw_str_error(start, 2);
845                }
846            }
847            rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
848                if let Some(n_hashes) = n_hashes {
849                    let n = u32::from(n_hashes);
850                    let kind = token::CStrRaw(n_hashes);
851                    self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n)
852                // cr##" "##
853                } else {
854                    self.report_raw_str_error(start, 2);
855                }
856            }
857            rustc_lexer::LiteralKind::Int { base, empty_int } => {
858                let mut kind = token::Integer;
859                if empty_int {
860                    let span = self.mk_sp(start, end);
861                    let guar = self.dcx().emit_err(errors::NoDigitsLiteral { span });
862                    kind = token::Err(guar);
863                } else if #[allow(non_exhaustive_omitted_patterns)] match base {
    Base::Binary | Base::Octal => true,
    _ => false,
}matches!(base, Base::Binary | Base::Octal) {
864                    let base = base as u32;
865                    let s = self.str_from_to(start + BytePos(2), end);
866                    for (idx, c) in s.char_indices() {
867                        let span = self.mk_sp(
868                            start + BytePos::from_usize(2 + idx),
869                            start + BytePos::from_usize(2 + idx + c.len_utf8()),
870                        );
871                        if c != '_' && c.to_digit(base).is_none() {
872                            let guar =
873                                self.dcx().emit_err(errors::InvalidDigitLiteral { span, base });
874                            kind = token::Err(guar);
875                        }
876                    }
877                }
878                (kind, self.symbol_from_to(start, end))
879            }
880            rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
881                let mut kind = token::Float;
882                if empty_exponent {
883                    let span = self.mk_sp(start, self.pos);
884                    let guar = self.dcx().emit_err(errors::EmptyExponentFloat { span });
885                    kind = token::Err(guar);
886                }
887                let base = match base {
888                    Base::Hexadecimal => Some("hexadecimal"),
889                    Base::Octal => Some("octal"),
890                    Base::Binary => Some("binary"),
891                    _ => None,
892                };
893                if let Some(base) = base {
894                    let span = self.mk_sp(start, end);
895                    let guar =
896                        self.dcx().emit_err(errors::FloatLiteralUnsupportedBase { span, base });
897                    kind = token::Err(guar)
898                }
899                (kind, self.symbol_from_to(start, end))
900            }
901        }
902    }
903
904    #[inline]
905    fn src_index(&self, pos: BytePos) -> usize {
906        (pos - self.start_pos).to_usize()
907    }
908
909    /// Slice of the source text from `start` up to but excluding `self.pos`,
910    /// meaning the slice does not include the character `self.ch`.
911    fn str_from(&self, start: BytePos) -> &'src str {
912        self.str_from_to(start, self.pos)
913    }
914
915    /// As symbol_from, with an explicit endpoint.
916    fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
917        {
    use ::tracing::__macro_support::Callsite as _;
    static __CALLSITE: ::tracing::callsite::DefaultCallsite =
        {
            static META: ::tracing::Metadata<'static> =
                {
                    ::tracing_core::metadata::Metadata::new("event compiler/rustc_parse/src/lexer/mod.rs:917",
                        "rustc_parse::lexer", ::tracing::Level::DEBUG,
                        ::tracing_core::__macro_support::Option::Some("compiler/rustc_parse/src/lexer/mod.rs"),
                        ::tracing_core::__macro_support::Option::Some(917u32),
                        ::tracing_core::__macro_support::Option::Some("rustc_parse::lexer"),
                        ::tracing_core::field::FieldSet::new(&["message"],
                            ::tracing_core::callsite::Identifier(&__CALLSITE)),
                        ::tracing::metadata::Kind::EVENT)
                };
            ::tracing::callsite::DefaultCallsite::new(&META)
        };
    let enabled =
        ::tracing::Level::DEBUG <= ::tracing::level_filters::STATIC_MAX_LEVEL
                &&
                ::tracing::Level::DEBUG <=
                    ::tracing::level_filters::LevelFilter::current() &&
            {
                let interest = __CALLSITE.interest();
                !interest.is_never() &&
                    ::tracing::__macro_support::__is_enabled(__CALLSITE.metadata(),
                        interest)
            };
    if enabled {
        (|value_set: ::tracing::field::ValueSet|
                    {
                        let meta = __CALLSITE.metadata();
                        ::tracing::Event::dispatch(meta, &value_set);
                        ;
                    })({
                #[allow(unused_imports)]
                use ::tracing::field::{debug, display, Value};
                let mut iter = __CALLSITE.metadata().fields().iter();
                __CALLSITE.metadata().fields().value_set(&[(&::tracing::__macro_support::Iterator::next(&mut iter).expect("FieldSet corrupted (this is a bug)"),
                                    ::tracing::__macro_support::Option::Some(&format_args!("taking an ident from {0:?} to {1:?}",
                                                    start, end) as &dyn Value))])
            });
    } else { ; }
};debug!("taking an ident from {:?} to {:?}", start, end);
918        Symbol::intern(self.str_from_to(start, end))
919    }
920
921    /// Slice of the source text spanning from `start` up to but excluding `end`.
922    fn str_from_to(&self, start: BytePos, end: BytePos) -> &'src str {
923        &self.src[self.src_index(start)..self.src_index(end)]
924    }
925
926    /// Slice of the source text spanning from `start` until the end
927    fn str_from_to_end(&self, start: BytePos) -> &'src str {
928        &self.src[self.src_index(start)..]
929    }
930
931    fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! {
932        match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) {
933            Err(RawStrError::InvalidStarter { bad_char }) => {
934                self.report_non_started_raw_string(start, bad_char)
935            }
936            Err(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self
937                .report_unterminated_raw_string(start, expected, possible_terminator_offset, found),
938            Err(RawStrError::TooManyDelimiters { found }) => {
939                self.report_too_many_hashes(start, found)
940            }
941            Ok(()) => {
    ::core::panicking::panic_fmt(format_args!("no error found for supposedly invalid raw string literal"));
}panic!("no error found for supposedly invalid raw string literal"),
942        }
943    }
944
945    fn report_non_started_raw_string(&self, start: BytePos, bad_char: char) -> ! {
946        self.dcx()
947            .struct_span_fatal(
948                self.mk_sp(start, self.pos),
949                ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("found invalid character; only `#` is allowed in raw string delimitation: {0}",
                escaped_char(bad_char)))
    })format!(
950                    "found invalid character; only `#` is allowed in raw string delimitation: {}",
951                    escaped_char(bad_char)
952                ),
953            )
954            .emit()
955    }
956
957    fn report_unterminated_raw_string(
958        &self,
959        start: BytePos,
960        n_hashes: u32,
961        possible_offset: Option<u32>,
962        found_terminators: u32,
963    ) -> ! {
964        let mut err =
965            self.dcx().struct_span_fatal(self.mk_sp(start, start), "unterminated raw string");
966        err.code(E0748);
967        err.span_label(self.mk_sp(start, start), "unterminated raw string");
968
969        if n_hashes > 0 {
970            err.note(::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("this raw string should be terminated with `\"{0}`",
                "#".repeat(n_hashes as usize)))
    })format!(
971                "this raw string should be terminated with `\"{}`",
972                "#".repeat(n_hashes as usize)
973            ));
974        }
975
976        if let Some(possible_offset) = possible_offset {
977            let lo = start + BytePos(possible_offset);
978            let hi = lo + BytePos(found_terminators);
979            let span = self.mk_sp(lo, hi);
980            err.span_suggestion(
981                span,
982                "consider terminating the string here",
983                "#".repeat(n_hashes as usize),
984                Applicability::MaybeIncorrect,
985            );
986        }
987
988        err.emit()
989    }
990
991    fn report_unterminated_block_comment(&self, start: BytePos, doc_style: Option<DocStyle>) {
992        let msg = match doc_style {
993            Some(_) => "unterminated block doc-comment",
994            None => "unterminated block comment",
995        };
996        let last_bpos = self.pos;
997        let mut err = self.dcx().struct_span_fatal(self.mk_sp(start, last_bpos), msg);
998        err.code(E0758);
999        let mut nested_block_comment_open_idxs = ::alloc::vec::Vec::new()vec![];
1000        let mut last_nested_block_comment_idxs = None;
1001        let mut content_chars = self.str_from(start).char_indices().peekable();
1002
1003        while let Some((idx, current_char)) = content_chars.next() {
1004            match content_chars.peek() {
1005                Some((_, '*')) if current_char == '/' => {
1006                    nested_block_comment_open_idxs.push(idx);
1007                }
1008                Some((_, '/')) if current_char == '*' => {
1009                    last_nested_block_comment_idxs =
1010                        nested_block_comment_open_idxs.pop().map(|open_idx| (open_idx, idx));
1011                }
1012                _ => {}
1013            };
1014        }
1015
1016        if let Some((nested_open_idx, nested_close_idx)) = last_nested_block_comment_idxs {
1017            err.span_label(self.mk_sp(start, start + BytePos(2)), msg)
1018                .span_label(
1019                    self.mk_sp(
1020                        start + BytePos(nested_open_idx as u32),
1021                        start + BytePos(nested_open_idx as u32 + 2),
1022                    ),
1023                    "...as last nested comment starts here, maybe you want to close this instead?",
1024                )
1025                .span_label(
1026                    self.mk_sp(
1027                        start + BytePos(nested_close_idx as u32),
1028                        start + BytePos(nested_close_idx as u32 + 2),
1029                    ),
1030                    "...and last nested comment terminates here.",
1031                );
1032        }
1033
1034        err.emit();
1035    }
1036
1037    // RFC 3101 introduced the idea of (reserved) prefixes. As of Rust 2021,
1038    // using a (unknown) prefix is an error. In earlier editions, however, they
1039    // only result in a (allowed by default) lint, and are treated as regular
1040    // identifier tokens.
1041    fn report_unknown_prefix(&self, start: BytePos) {
1042        let prefix_span = self.mk_sp(start, self.pos);
1043        let prefix = self.str_from_to(start, self.pos);
1044        let expn_data = prefix_span.ctxt().outer_expn_data();
1045
1046        if expn_data.edition.at_least_rust_2021() {
1047            // In Rust 2021, this is a hard error.
1048            let sugg = if prefix == "rb" {
1049                Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
1050            } else if prefix == "rc" {
1051                Some(errors::UnknownPrefixSugg::UseCr(prefix_span))
1052            } else if expn_data.is_root() {
1053                if self.cursor.first() == '\''
1054                    && let Some(start) = self.last_lifetime
1055                    && self.cursor.third() != '\''
1056                    && let end = self.mk_sp(self.pos, self.pos + BytePos(1))
1057                    && !self.psess.source_map().is_multiline(start.until(end))
1058                {
1059                    // FIXME: An "unclosed `char`" error will be emitted already in some cases,
1060                    // but it's hard to silence this error while not also silencing important cases
1061                    // too. We should use the error stashing machinery instead.
1062                    Some(errors::UnknownPrefixSugg::MeantStr { start, end })
1063                } else {
1064                    Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
1065                }
1066            } else {
1067                None
1068            };
1069            self.dcx().emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg });
1070        } else {
1071            // Before Rust 2021, only emit a lint for migration.
1072            self.psess.buffer_lint(
1073                RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
1074                prefix_span,
1075                ast::CRATE_NODE_ID,
1076                errors::ReservedPrefix {
1077                    label: prefix_span,
1078                    suggestion: prefix_span.shrink_to_hi(),
1079                    prefix: prefix.to_string(),
1080                },
1081            );
1082        }
1083    }
1084
1085    /// Detect guarded string literal syntax
1086    ///
1087    /// RFC 3593 reserved this syntax for future use. As of Rust 2024,
1088    /// using this syntax produces an error. In earlier editions, however, it
1089    /// only results in an (allowed by default) lint, and is treated as
1090    /// separate tokens.
1091    fn maybe_report_guarded_str(&mut self, start: BytePos, str_before: &'src str) -> TokenKind {
1092        let span = self.mk_sp(start, self.pos);
1093        let edition2024 = span.edition().at_least_rust_2024();
1094
1095        let space_pos = start + BytePos(1);
1096        let space_span = self.mk_sp(space_pos, space_pos);
1097
1098        let mut cursor = Cursor::new(str_before, FrontmatterAllowed::No);
1099
1100        let (is_string, span, unterminated) = match cursor.guarded_double_quoted_string() {
1101            Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => {
1102                let end = start + BytePos(token_len);
1103                let span = self.mk_sp(start, end);
1104                let str_start = start + BytePos(n_hashes);
1105
1106                if edition2024 {
1107                    self.cursor = cursor;
1108                    self.pos = end;
1109                }
1110
1111                let unterminated = if terminated { None } else { Some(str_start) };
1112
1113                (true, span, unterminated)
1114            }
1115            None => {
1116                // We should only get here in the `##+` case.
1117                if true {
    match (&self.str_from_to(start, start + BytePos(2)), &"##") {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!(self.str_from_to(start, start + BytePos(2)), "##");
1118
1119                (false, span, None)
1120            }
1121        };
1122        if edition2024 {
1123            if let Some(str_start) = unterminated {
1124                // Only a fatal error if string is unterminated.
1125                self.dcx()
1126                    .struct_span_fatal(
1127                        self.mk_sp(str_start, self.pos),
1128                        "unterminated double quote string",
1129                    )
1130                    .with_code(E0765)
1131                    .emit()
1132            }
1133
1134            let sugg = if span.from_expansion() {
1135                None
1136            } else {
1137                Some(errors::GuardedStringSugg(space_span))
1138            };
1139
1140            // In Edition 2024 and later, emit a hard error.
1141            let err = if is_string {
1142                self.dcx().emit_err(errors::ReservedString { span, sugg })
1143            } else {
1144                self.dcx().emit_err(errors::ReservedMultihash { span, sugg })
1145            };
1146
1147            token::Literal(token::Lit {
1148                kind: token::Err(err),
1149                symbol: self.symbol_from_to(start, self.pos),
1150                suffix: None,
1151            })
1152        } else {
1153            // Before Rust 2024, only emit a lint for migration.
1154            self.psess.dyn_buffer_lint(
1155                RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
1156                span,
1157                ast::CRATE_NODE_ID,
1158                move |dcx, level| {
1159                    if is_string {
1160                        errors::ReservedStringLint { suggestion: space_span }.into_diag(dcx, level)
1161                    } else {
1162                        errors::ReservedMultihashLint { suggestion: space_span }
1163                            .into_diag(dcx, level)
1164                    }
1165                },
1166            );
1167
1168            // For backwards compatibility, roll back to after just the first `#`
1169            // and return the `Pound` token.
1170            self.pos = start + BytePos(1);
1171            self.cursor = Cursor::new(&str_before[1..], FrontmatterAllowed::No);
1172            token::Pound
1173        }
1174    }
1175
1176    fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
1177        self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
1178    }
1179
1180    fn cook_quoted(
1181        &self,
1182        mut kind: token::LitKind,
1183        mode: Mode,
1184        start: BytePos,
1185        end: BytePos,
1186        prefix_len: u32,
1187        postfix_len: u32,
1188    ) -> (token::LitKind, Symbol) {
1189        let content_start = start + BytePos(prefix_len);
1190        let content_end = end - BytePos(postfix_len);
1191        let lit_content = self.str_from_to(content_start, content_end);
1192        check_for_errors(lit_content, mode, |range, err| {
1193            let span_with_quotes = self.mk_sp(start, end);
1194            let (start, end) = (range.start as u32, range.end as u32);
1195            let lo = content_start + BytePos(start);
1196            let hi = lo + BytePos(end - start);
1197            let span = self.mk_sp(lo, hi);
1198            let is_fatal = err.is_fatal();
1199            if let Some(guar) = emit_unescape_error(
1200                self.dcx(),
1201                lit_content,
1202                span_with_quotes,
1203                span,
1204                mode,
1205                range,
1206                err,
1207            ) {
1208                if !is_fatal { ::core::panicking::panic("assertion failed: is_fatal") };assert!(is_fatal);
1209                kind = token::Err(guar);
1210            }
1211        });
1212
1213        // We normally exclude the quotes for the symbol, but for errors we
1214        // include it because it results in clearer error messages.
1215        let sym = if !#[allow(non_exhaustive_omitted_patterns)] match kind {
    token::Err(_) => true,
    _ => false,
}matches!(kind, token::Err(_)) {
1216            Symbol::intern(lit_content)
1217        } else {
1218            self.symbol_from_to(start, end)
1219        };
1220        (kind, sym)
1221    }
1222}
1223
1224pub fn nfc_normalize(string: &str) -> Symbol {
1225    use unicode_normalization::{IsNormalized, UnicodeNormalization, is_nfc_quick};
1226    match is_nfc_quick(string.chars()) {
1227        IsNormalized::Yes => Symbol::intern(string),
1228        _ => {
1229            let normalized_str: String = string.chars().nfc().collect();
1230            Symbol::intern(&normalized_str)
1231        }
1232    }
1233}