Skip to main content

rustc_lexer/
lib.rs

1//! Low-level Rust lexer.
2//!
3//! The idea with `rustc_lexer` is to make a reusable library,
4//! by separating out pure lexing and rustc-specific concerns, like spans,
5//! error reporting, and interning. So, rustc_lexer operates directly on `&str`,
6//! produces simple tokens which are a pair of type-tag and a bit of original text,
7//! and does not report errors, instead storing them as flags on the token.
8//!
9//! Tokens produced by this lexer are not yet ready for parsing the Rust syntax.
10//! For that see [`rustc_parse::lexer`], which converts this basic token stream
11//! into wide tokens used by actual parser.
12//!
13//! The purpose of this crate is to convert raw sources into a labeled sequence
14//! of well-known token types, so building an actual Rust token stream will
15//! be easier.
16//!
17//! The main entity of this crate is the [`TokenKind`] enum which represents common
18//! lexeme types.
19//!
20//! [`rustc_parse::lexer`]: ../rustc_parse/lexer/index.html
21
22// tidy-alphabetical-start
23// We want to be able to build this crate with a stable compiler,
24// so no `#![feature]` attributes should be added.
25#![deny(unstable_features)]
26// tidy-alphabetical-end
27
28#[cfg(test)]
29mod tests;
30
31use std::str::Chars;
32
33use LiteralKind::*;
34use TokenKind::*;
35pub use unicode_ident::UNICODE_VERSION;
36use unicode_properties::UnicodeEmoji;
37
38// Make sure that the Unicode version of the dependencies is the same.
39const _: () = {
40    let properties = unicode_properties::UNICODE_VERSION;
41    let ident = unicode_ident::UNICODE_VERSION;
42
43    if properties.0 != ident.0 as u64
44        || properties.1 != ident.1 as u64
45        || properties.2 != ident.2 as u64
46    {
47        {
    ::core::panicking::panic_fmt(format_args!("unicode-properties and unicode-ident must use the same Unicode version, `unicode_properties::UNICODE_VERSION` and `unicode_ident::UNICODE_VERSION` are different."));
};panic!(
48            "unicode-properties and unicode-ident must use the same Unicode version, \
49            `unicode_properties::UNICODE_VERSION` and `unicode_ident::UNICODE_VERSION` are \
50            different."
51        );
52    }
53};
54
55/// Parsed token.
56/// It doesn't contain information about data that has been parsed,
57/// only the type of the token and its size.
58#[derive(#[automatically_derived]
impl ::core::fmt::Debug for Token {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f, "Token", "kind",
            &self.kind, "len", &&self.len)
    }
}Debug)]
59pub struct Token {
60    pub kind: TokenKind,
61    pub len: u32,
62}
63
64impl Token {
65    fn new(kind: TokenKind, len: u32) -> Token {
66        Token { kind, len }
67    }
68}
69
70/// Enum representing common lexeme types.
71#[derive(#[automatically_derived]
impl ::core::clone::Clone for TokenKind {
    #[inline]
    fn clone(&self) -> TokenKind {
        let _: ::core::clone::AssertParamIsClone<Option<DocStyle>>;
        let _: ::core::clone::AssertParamIsClone<Option<DocStyle>>;
        let _: ::core::clone::AssertParamIsClone<bool>;
        let _: ::core::clone::AssertParamIsClone<LiteralKind>;
        let _: ::core::clone::AssertParamIsClone<u32>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for TokenKind { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for TokenKind {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            TokenKind::LineComment { doc_style: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "LineComment", "doc_style", &__self_0),
            TokenKind::BlockComment {
                doc_style: __self_0, terminated: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f,
                    "BlockComment", "doc_style", __self_0, "terminated",
                    &__self_1),
            TokenKind::Whitespace =>
                ::core::fmt::Formatter::write_str(f, "Whitespace"),
            TokenKind::Frontmatter {
                has_invalid_preceding_whitespace: __self_0,
                invalid_infostring: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f,
                    "Frontmatter", "has_invalid_preceding_whitespace", __self_0,
                    "invalid_infostring", &__self_1),
            TokenKind::Ident => ::core::fmt::Formatter::write_str(f, "Ident"),
            TokenKind::InvalidIdent =>
                ::core::fmt::Formatter::write_str(f, "InvalidIdent"),
            TokenKind::RawIdent =>
                ::core::fmt::Formatter::write_str(f, "RawIdent"),
            TokenKind::UnknownPrefix =>
                ::core::fmt::Formatter::write_str(f, "UnknownPrefix"),
            TokenKind::UnknownPrefixLifetime =>
                ::core::fmt::Formatter::write_str(f, "UnknownPrefixLifetime"),
            TokenKind::RawLifetime =>
                ::core::fmt::Formatter::write_str(f, "RawLifetime"),
            TokenKind::GuardedStrPrefix =>
                ::core::fmt::Formatter::write_str(f, "GuardedStrPrefix"),
            TokenKind::Literal { kind: __self_0, suffix_start: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f,
                    "Literal", "kind", __self_0, "suffix_start", &__self_1),
            TokenKind::Lifetime { starts_with_number: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "Lifetime", "starts_with_number", &__self_0),
            TokenKind::Semi => ::core::fmt::Formatter::write_str(f, "Semi"),
            TokenKind::Comma => ::core::fmt::Formatter::write_str(f, "Comma"),
            TokenKind::Dot => ::core::fmt::Formatter::write_str(f, "Dot"),
            TokenKind::OpenParen =>
                ::core::fmt::Formatter::write_str(f, "OpenParen"),
            TokenKind::CloseParen =>
                ::core::fmt::Formatter::write_str(f, "CloseParen"),
            TokenKind::OpenBrace =>
                ::core::fmt::Formatter::write_str(f, "OpenBrace"),
            TokenKind::CloseBrace =>
                ::core::fmt::Formatter::write_str(f, "CloseBrace"),
            TokenKind::OpenBracket =>
                ::core::fmt::Formatter::write_str(f, "OpenBracket"),
            TokenKind::CloseBracket =>
                ::core::fmt::Formatter::write_str(f, "CloseBracket"),
            TokenKind::At => ::core::fmt::Formatter::write_str(f, "At"),
            TokenKind::Pound => ::core::fmt::Formatter::write_str(f, "Pound"),
            TokenKind::Tilde => ::core::fmt::Formatter::write_str(f, "Tilde"),
            TokenKind::Question =>
                ::core::fmt::Formatter::write_str(f, "Question"),
            TokenKind::Colon => ::core::fmt::Formatter::write_str(f, "Colon"),
            TokenKind::Dollar =>
                ::core::fmt::Formatter::write_str(f, "Dollar"),
            TokenKind::Eq => ::core::fmt::Formatter::write_str(f, "Eq"),
            TokenKind::Bang => ::core::fmt::Formatter::write_str(f, "Bang"),
            TokenKind::Lt => ::core::fmt::Formatter::write_str(f, "Lt"),
            TokenKind::Gt => ::core::fmt::Formatter::write_str(f, "Gt"),
            TokenKind::Minus => ::core::fmt::Formatter::write_str(f, "Minus"),
            TokenKind::And => ::core::fmt::Formatter::write_str(f, "And"),
            TokenKind::Or => ::core::fmt::Formatter::write_str(f, "Or"),
            TokenKind::Plus => ::core::fmt::Formatter::write_str(f, "Plus"),
            TokenKind::Star => ::core::fmt::Formatter::write_str(f, "Star"),
            TokenKind::Slash => ::core::fmt::Formatter::write_str(f, "Slash"),
            TokenKind::Caret => ::core::fmt::Formatter::write_str(f, "Caret"),
            TokenKind::Percent =>
                ::core::fmt::Formatter::write_str(f, "Percent"),
            TokenKind::Unknown =>
                ::core::fmt::Formatter::write_str(f, "Unknown"),
            TokenKind::Eof => ::core::fmt::Formatter::write_str(f, "Eof"),
        }
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for TokenKind {
    #[inline]
    fn eq(&self, other: &TokenKind) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr &&
            match (self, other) {
                (TokenKind::LineComment { doc_style: __self_0 },
                    TokenKind::LineComment { doc_style: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (TokenKind::BlockComment {
                    doc_style: __self_0, terminated: __self_1 },
                    TokenKind::BlockComment {
                    doc_style: __arg1_0, terminated: __arg1_1 }) =>
                    __self_1 == __arg1_1 && __self_0 == __arg1_0,
                (TokenKind::Frontmatter {
                    has_invalid_preceding_whitespace: __self_0,
                    invalid_infostring: __self_1 }, TokenKind::Frontmatter {
                    has_invalid_preceding_whitespace: __arg1_0,
                    invalid_infostring: __arg1_1 }) =>
                    __self_0 == __arg1_0 && __self_1 == __arg1_1,
                (TokenKind::Literal { kind: __self_0, suffix_start: __self_1
                    }, TokenKind::Literal {
                    kind: __arg1_0, suffix_start: __arg1_1 }) =>
                    __self_1 == __arg1_1 && __self_0 == __arg1_0,
                (TokenKind::Lifetime { starts_with_number: __self_0 },
                    TokenKind::Lifetime { starts_with_number: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                _ => true,
            }
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for TokenKind {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<Option<DocStyle>>;
        let _: ::core::cmp::AssertParamIsEq<Option<DocStyle>>;
        let _: ::core::cmp::AssertParamIsEq<bool>;
        let _: ::core::cmp::AssertParamIsEq<LiteralKind>;
        let _: ::core::cmp::AssertParamIsEq<u32>;
    }
}Eq)]
72pub enum TokenKind {
73    /// A line comment, e.g. `// comment`.
74    LineComment {
75        doc_style: Option<DocStyle>,
76    },
77
78    /// A block comment, e.g. `/* block comment */`.
79    ///
80    /// Block comments can be recursive, so a sequence like `/* /* */`
81    /// will not be considered terminated and will result in a parsing error.
82    BlockComment {
83        doc_style: Option<DocStyle>,
84        terminated: bool,
85    },
86
87    /// Any whitespace character sequence.
88    Whitespace,
89
90    Frontmatter {
91        has_invalid_preceding_whitespace: bool,
92        invalid_infostring: bool,
93    },
94
95    /// An identifier or keyword, e.g. `ident` or `continue`.
96    Ident,
97
98    /// An identifier that is invalid because it contains emoji.
99    InvalidIdent,
100
101    /// A raw identifier, e.g. "r#ident".
102    RawIdent,
103
104    /// An unknown literal prefix, like `foo#`, `foo'`, `foo"`. Excludes
105    /// literal prefixes that contain emoji, which are considered "invalid".
106    ///
107    /// Note that only the
108    /// prefix (`foo`) is included in the token, not the separator (which is
109    /// lexed as its own distinct token). In Rust 2021 and later, reserved
110    /// prefixes are reported as errors; in earlier editions, they result in a
111    /// (allowed by default) lint, and are treated as regular identifier
112    /// tokens.
113    UnknownPrefix,
114
115    /// An unknown prefix in a lifetime, like `'foo#`.
116    ///
117    /// Like `UnknownPrefix`, only the `'` and prefix are included in the token
118    /// and not the separator.
119    UnknownPrefixLifetime,
120
121    /// A raw lifetime, e.g. `'r#foo`. In edition < 2021 it will be split into
122    /// several tokens: `'r` and `#` and `foo`.
123    RawLifetime,
124
125    /// Guarded string literal prefix: `#"` or `##`.
126    ///
127    /// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
128    /// Split into the component tokens on older editions.
129    GuardedStrPrefix,
130
131    /// Literals, e.g. `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
132    /// suffix, but may be present here on string and float literals. Users of
133    /// this type will need to check for and reject that case.
134    ///
135    /// See [LiteralKind] for more details.
136    Literal {
137        kind: LiteralKind,
138        suffix_start: u32,
139    },
140
141    /// A lifetime, e.g. `'a`.
142    Lifetime {
143        starts_with_number: bool,
144    },
145
146    /// `;`
147    Semi,
148    /// `,`
149    Comma,
150    /// `.`
151    Dot,
152    /// `(`
153    OpenParen,
154    /// `)`
155    CloseParen,
156    /// `{`
157    OpenBrace,
158    /// `}`
159    CloseBrace,
160    /// `[`
161    OpenBracket,
162    /// `]`
163    CloseBracket,
164    /// `@`
165    At,
166    /// `#`
167    Pound,
168    /// `~`
169    Tilde,
170    /// `?`
171    Question,
172    /// `:`
173    Colon,
174    /// `$`
175    Dollar,
176    /// `=`
177    Eq,
178    /// `!`
179    Bang,
180    /// `<`
181    Lt,
182    /// `>`
183    Gt,
184    /// `-`
185    Minus,
186    /// `&`
187    And,
188    /// `|`
189    Or,
190    /// `+`
191    Plus,
192    /// `*`
193    Star,
194    /// `/`
195    Slash,
196    /// `^`
197    Caret,
198    /// `%`
199    Percent,
200
201    /// Unknown token, not expected by the lexer, e.g. "№"
202    Unknown,
203
204    /// End of input.
205    Eof,
206}
207
208#[derive(#[automatically_derived]
impl ::core::clone::Clone for DocStyle {
    #[inline]
    fn clone(&self) -> DocStyle { *self }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for DocStyle { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for DocStyle {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f,
            match self {
                DocStyle::Outer => "Outer",
                DocStyle::Inner => "Inner",
            })
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for DocStyle {
    #[inline]
    fn eq(&self, other: &DocStyle) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for DocStyle {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {}
}Eq)]
209pub enum DocStyle {
210    Outer,
211    Inner,
212}
213
214/// Enum representing the literal types supported by the lexer.
215///
216/// Note that the suffix is *not* considered when deciding the `LiteralKind` in
217/// this type. This means that float literals like `1f32` are classified by this
218/// type as `Int`. (Compare against `rustc_ast::token::LitKind` and
219/// `rustc_ast::ast::LitKind`).
220#[derive(#[automatically_derived]
impl ::core::clone::Clone for LiteralKind {
    #[inline]
    fn clone(&self) -> LiteralKind {
        let _: ::core::clone::AssertParamIsClone<Base>;
        let _: ::core::clone::AssertParamIsClone<bool>;
        let _: ::core::clone::AssertParamIsClone<Option<u8>>;
        let _: ::core::clone::AssertParamIsClone<Option<u8>>;
        let _: ::core::clone::AssertParamIsClone<Option<u8>>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for LiteralKind { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for LiteralKind {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            LiteralKind::Int { base: __self_0, empty_int: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f, "Int",
                    "base", __self_0, "empty_int", &__self_1),
            LiteralKind::Float { base: __self_0, empty_exponent: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f, "Float",
                    "base", __self_0, "empty_exponent", &__self_1),
            LiteralKind::Char { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f, "Char",
                    "terminated", &__self_0),
            LiteralKind::Byte { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f, "Byte",
                    "terminated", &__self_0),
            LiteralKind::Str { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f, "Str",
                    "terminated", &__self_0),
            LiteralKind::ByteStr { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "ByteStr", "terminated", &__self_0),
            LiteralKind::CStr { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f, "CStr",
                    "terminated", &__self_0),
            LiteralKind::RawStr { n_hashes: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "RawStr", "n_hashes", &__self_0),
            LiteralKind::RawByteStr { n_hashes: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "RawByteStr", "n_hashes", &__self_0),
            LiteralKind::RawCStr { n_hashes: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "RawCStr", "n_hashes", &__self_0),
        }
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for LiteralKind {
    #[inline]
    fn eq(&self, other: &LiteralKind) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr &&
            match (self, other) {
                (LiteralKind::Int { base: __self_0, empty_int: __self_1 },
                    LiteralKind::Int { base: __arg1_0, empty_int: __arg1_1 }) =>
                    __self_1 == __arg1_1 && __self_0 == __arg1_0,
                (LiteralKind::Float { base: __self_0, empty_exponent: __self_1
                    }, LiteralKind::Float {
                    base: __arg1_0, empty_exponent: __arg1_1 }) =>
                    __self_1 == __arg1_1 && __self_0 == __arg1_0,
                (LiteralKind::Char { terminated: __self_0 },
                    LiteralKind::Char { terminated: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::Byte { terminated: __self_0 },
                    LiteralKind::Byte { terminated: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::Str { terminated: __self_0 }, LiteralKind::Str {
                    terminated: __arg1_0 }) => __self_0 == __arg1_0,
                (LiteralKind::ByteStr { terminated: __self_0 },
                    LiteralKind::ByteStr { terminated: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::CStr { terminated: __self_0 },
                    LiteralKind::CStr { terminated: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::RawStr { n_hashes: __self_0 },
                    LiteralKind::RawStr { n_hashes: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::RawByteStr { n_hashes: __self_0 },
                    LiteralKind::RawByteStr { n_hashes: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::RawCStr { n_hashes: __self_0 },
                    LiteralKind::RawCStr { n_hashes: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                _ => unsafe { ::core::intrinsics::unreachable() }
            }
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for LiteralKind {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<Base>;
        let _: ::core::cmp::AssertParamIsEq<bool>;
        let _: ::core::cmp::AssertParamIsEq<Option<u8>>;
        let _: ::core::cmp::AssertParamIsEq<Option<u8>>;
        let _: ::core::cmp::AssertParamIsEq<Option<u8>>;
    }
}Eq, #[automatically_derived]
impl ::core::cmp::PartialOrd for LiteralKind {
    #[inline]
    fn partial_cmp(&self, other: &LiteralKind)
        -> ::core::option::Option<::core::cmp::Ordering> {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match (self, other) {
            (LiteralKind::Int { base: __self_0, empty_int: __self_1 },
                LiteralKind::Int { base: __arg1_0, empty_int: __arg1_1 }) =>
                match ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0)
                    {
                    ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                        => ::core::cmp::PartialOrd::partial_cmp(__self_1, __arg1_1),
                    cmp => cmp,
                },
            (LiteralKind::Float { base: __self_0, empty_exponent: __self_1 },
                LiteralKind::Float { base: __arg1_0, empty_exponent: __arg1_1
                }) =>
                match ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0)
                    {
                    ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                        => ::core::cmp::PartialOrd::partial_cmp(__self_1, __arg1_1),
                    cmp => cmp,
                },
            (LiteralKind::Char { terminated: __self_0 }, LiteralKind::Char {
                terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::Byte { terminated: __self_0 }, LiteralKind::Byte {
                terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::Str { terminated: __self_0 }, LiteralKind::Str {
                terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::ByteStr { terminated: __self_0 },
                LiteralKind::ByteStr { terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::CStr { terminated: __self_0 }, LiteralKind::CStr {
                terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::RawStr { n_hashes: __self_0 }, LiteralKind::RawStr {
                n_hashes: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::RawByteStr { n_hashes: __self_0 },
                LiteralKind::RawByteStr { n_hashes: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::RawCStr { n_hashes: __self_0 },
                LiteralKind::RawCStr { n_hashes: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            _ =>
                ::core::cmp::PartialOrd::partial_cmp(&__self_discr,
                    &__arg1_discr),
        }
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for LiteralKind {
    #[inline]
    fn cmp(&self, other: &LiteralKind) -> ::core::cmp::Ordering {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match ::core::cmp::Ord::cmp(&__self_discr, &__arg1_discr) {
            ::core::cmp::Ordering::Equal =>
                match (self, other) {
                    (LiteralKind::Int { base: __self_0, empty_int: __self_1 },
                        LiteralKind::Int { base: __arg1_0, empty_int: __arg1_1 }) =>
                        match ::core::cmp::Ord::cmp(__self_0, __arg1_0) {
                            ::core::cmp::Ordering::Equal =>
                                ::core::cmp::Ord::cmp(__self_1, __arg1_1),
                            cmp => cmp,
                        },
                    (LiteralKind::Float {
                        base: __self_0, empty_exponent: __self_1 },
                        LiteralKind::Float {
                        base: __arg1_0, empty_exponent: __arg1_1 }) =>
                        match ::core::cmp::Ord::cmp(__self_0, __arg1_0) {
                            ::core::cmp::Ordering::Equal =>
                                ::core::cmp::Ord::cmp(__self_1, __arg1_1),
                            cmp => cmp,
                        },
                    (LiteralKind::Char { terminated: __self_0 },
                        LiteralKind::Char { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::Byte { terminated: __self_0 },
                        LiteralKind::Byte { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::Str { terminated: __self_0 },
                        LiteralKind::Str { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::ByteStr { terminated: __self_0 },
                        LiteralKind::ByteStr { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::CStr { terminated: __self_0 },
                        LiteralKind::CStr { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::RawStr { n_hashes: __self_0 },
                        LiteralKind::RawStr { n_hashes: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::RawByteStr { n_hashes: __self_0 },
                        LiteralKind::RawByteStr { n_hashes: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::RawCStr { n_hashes: __self_0 },
                        LiteralKind::RawCStr { n_hashes: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    _ => unsafe { ::core::intrinsics::unreachable() }
                },
            cmp => cmp,
        }
    }
}Ord)]
221pub enum LiteralKind {
222    /// `12_u8`, `0o100`, `0b120i99`, `1f32`.
223    Int { base: Base, empty_int: bool },
224    /// `12.34f32`, `1e3`, but not `1f32`.
225    Float { base: Base, empty_exponent: bool },
226    /// `'a'`, `'\\'`, `'''`, `';`
227    Char { terminated: bool },
228    /// `b'a'`, `b'\\'`, `b'''`, `b';`
229    Byte { terminated: bool },
230    /// `"abc"`, `"abc`
231    Str { terminated: bool },
232    /// `b"abc"`, `b"abc`
233    ByteStr { terminated: bool },
234    /// `c"abc"`, `c"abc`
235    CStr { terminated: bool },
236    /// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
237    /// an invalid literal.
238    RawStr { n_hashes: Option<u8> },
239    /// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
240    /// indicates an invalid literal.
241    RawByteStr { n_hashes: Option<u8> },
242    /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
243    RawCStr { n_hashes: Option<u8> },
244}
245
246/// `#"abc"#`, `##"a"` (fewer closing), or even `#"a` (unterminated).
247///
248/// Can capture fewer closing hashes than starting hashes,
249/// for more efficient lexing and better backwards diagnostics.
250#[derive(#[automatically_derived]
impl ::core::clone::Clone for GuardedStr {
    #[inline]
    fn clone(&self) -> GuardedStr {
        let _: ::core::clone::AssertParamIsClone<u32>;
        let _: ::core::clone::AssertParamIsClone<bool>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for GuardedStr { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for GuardedStr {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field3_finish(f, "GuardedStr",
            "n_hashes", &self.n_hashes, "terminated", &self.terminated,
            "token_len", &&self.token_len)
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for GuardedStr {
    #[inline]
    fn eq(&self, other: &GuardedStr) -> bool {
        self.n_hashes == other.n_hashes && self.terminated == other.terminated
            && self.token_len == other.token_len
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for GuardedStr {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<u32>;
        let _: ::core::cmp::AssertParamIsEq<bool>;
    }
}Eq, #[automatically_derived]
impl ::core::cmp::PartialOrd for GuardedStr {
    #[inline]
    fn partial_cmp(&self, other: &GuardedStr)
        -> ::core::option::Option<::core::cmp::Ordering> {
        match ::core::cmp::PartialOrd::partial_cmp(&self.n_hashes,
                &other.n_hashes) {
            ::core::option::Option::Some(::core::cmp::Ordering::Equal) =>
                match ::core::cmp::PartialOrd::partial_cmp(&self.terminated,
                        &other.terminated) {
                    ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                        =>
                        ::core::cmp::PartialOrd::partial_cmp(&self.token_len,
                            &other.token_len),
                    cmp => cmp,
                },
            cmp => cmp,
        }
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for GuardedStr {
    #[inline]
    fn cmp(&self, other: &GuardedStr) -> ::core::cmp::Ordering {
        match ::core::cmp::Ord::cmp(&self.n_hashes, &other.n_hashes) {
            ::core::cmp::Ordering::Equal =>
                match ::core::cmp::Ord::cmp(&self.terminated,
                        &other.terminated) {
                    ::core::cmp::Ordering::Equal =>
                        ::core::cmp::Ord::cmp(&self.token_len, &other.token_len),
                    cmp => cmp,
                },
            cmp => cmp,
        }
    }
}Ord)]
251pub struct GuardedStr {
252    pub n_hashes: u32,
253    pub terminated: bool,
254    pub token_len: u32,
255}
256
257#[derive(#[automatically_derived]
impl ::core::clone::Clone for RawStrError {
    #[inline]
    fn clone(&self) -> RawStrError {
        let _: ::core::clone::AssertParamIsClone<char>;
        let _: ::core::clone::AssertParamIsClone<u32>;
        let _: ::core::clone::AssertParamIsClone<Option<u32>>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for RawStrError { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for RawStrError {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            RawStrError::InvalidStarter { bad_char: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "InvalidStarter", "bad_char", &__self_0),
            RawStrError::NoTerminator {
                expected: __self_0,
                found: __self_1,
                possible_terminator_offset: __self_2 } =>
                ::core::fmt::Formatter::debug_struct_field3_finish(f,
                    "NoTerminator", "expected", __self_0, "found", __self_1,
                    "possible_terminator_offset", &__self_2),
            RawStrError::TooManyDelimiters { found: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "TooManyDelimiters", "found", &__self_0),
        }
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for RawStrError {
    #[inline]
    fn eq(&self, other: &RawStrError) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr &&
            match (self, other) {
                (RawStrError::InvalidStarter { bad_char: __self_0 },
                    RawStrError::InvalidStarter { bad_char: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (RawStrError::NoTerminator {
                    expected: __self_0,
                    found: __self_1,
                    possible_terminator_offset: __self_2 },
                    RawStrError::NoTerminator {
                    expected: __arg1_0,
                    found: __arg1_1,
                    possible_terminator_offset: __arg1_2 }) =>
                    __self_0 == __arg1_0 && __self_1 == __arg1_1 &&
                        __self_2 == __arg1_2,
                (RawStrError::TooManyDelimiters { found: __self_0 },
                    RawStrError::TooManyDelimiters { found: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                _ => unsafe { ::core::intrinsics::unreachable() }
            }
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for RawStrError {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<char>;
        let _: ::core::cmp::AssertParamIsEq<u32>;
        let _: ::core::cmp::AssertParamIsEq<Option<u32>>;
    }
}Eq, #[automatically_derived]
impl ::core::cmp::PartialOrd for RawStrError {
    #[inline]
    fn partial_cmp(&self, other: &RawStrError)
        -> ::core::option::Option<::core::cmp::Ordering> {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match (self, other) {
            (RawStrError::InvalidStarter { bad_char: __self_0 },
                RawStrError::InvalidStarter { bad_char: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (RawStrError::NoTerminator {
                expected: __self_0,
                found: __self_1,
                possible_terminator_offset: __self_2 },
                RawStrError::NoTerminator {
                expected: __arg1_0,
                found: __arg1_1,
                possible_terminator_offset: __arg1_2 }) =>
                match ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0)
                    {
                    ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                        =>
                        match ::core::cmp::PartialOrd::partial_cmp(__self_1,
                                __arg1_1) {
                            ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                                => ::core::cmp::PartialOrd::partial_cmp(__self_2, __arg1_2),
                            cmp => cmp,
                        },
                    cmp => cmp,
                },
            (RawStrError::TooManyDelimiters { found: __self_0 },
                RawStrError::TooManyDelimiters { found: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            _ =>
                ::core::cmp::PartialOrd::partial_cmp(&__self_discr,
                    &__arg1_discr),
        }
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for RawStrError {
    #[inline]
    fn cmp(&self, other: &RawStrError) -> ::core::cmp::Ordering {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match ::core::cmp::Ord::cmp(&__self_discr, &__arg1_discr) {
            ::core::cmp::Ordering::Equal =>
                match (self, other) {
                    (RawStrError::InvalidStarter { bad_char: __self_0 },
                        RawStrError::InvalidStarter { bad_char: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (RawStrError::NoTerminator {
                        expected: __self_0,
                        found: __self_1,
                        possible_terminator_offset: __self_2 },
                        RawStrError::NoTerminator {
                        expected: __arg1_0,
                        found: __arg1_1,
                        possible_terminator_offset: __arg1_2 }) =>
                        match ::core::cmp::Ord::cmp(__self_0, __arg1_0) {
                            ::core::cmp::Ordering::Equal =>
                                match ::core::cmp::Ord::cmp(__self_1, __arg1_1) {
                                    ::core::cmp::Ordering::Equal =>
                                        ::core::cmp::Ord::cmp(__self_2, __arg1_2),
                                    cmp => cmp,
                                },
                            cmp => cmp,
                        },
                    (RawStrError::TooManyDelimiters { found: __self_0 },
                        RawStrError::TooManyDelimiters { found: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    _ => unsafe { ::core::intrinsics::unreachable() }
                },
            cmp => cmp,
        }
    }
}Ord)]
258pub enum RawStrError {
259    /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
260    InvalidStarter { bad_char: char },
261    /// The string was not terminated, e.g. `r###"abcde"##`.
262    /// `possible_terminator_offset` is the number of characters after `r` or
263    /// `br` where they may have intended to terminate it.
264    NoTerminator { expected: u32, found: u32, possible_terminator_offset: Option<u32> },
265    /// More than 255 `#`s exist.
266    TooManyDelimiters { found: u32 },
267}
268
269/// Base of numeric literal encoding according to its prefix.
270#[derive(#[automatically_derived]
impl ::core::clone::Clone for Base {
    #[inline]
    fn clone(&self) -> Base { *self }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for Base { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for Base {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f,
            match self {
                Base::Binary => "Binary",
                Base::Octal => "Octal",
                Base::Decimal => "Decimal",
                Base::Hexadecimal => "Hexadecimal",
            })
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for Base {
    #[inline]
    fn eq(&self, other: &Base) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for Base {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_fields_are_eq(&self) {}
}Eq, #[automatically_derived]
impl ::core::cmp::PartialOrd for Base {
    #[inline]
    fn partial_cmp(&self, other: &Base)
        -> ::core::option::Option<::core::cmp::Ordering> {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        ::core::cmp::PartialOrd::partial_cmp(&__self_discr, &__arg1_discr)
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for Base {
    #[inline]
    fn cmp(&self, other: &Base) -> ::core::cmp::Ordering {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        ::core::cmp::Ord::cmp(&__self_discr, &__arg1_discr)
    }
}Ord)]
271pub enum Base {
272    /// Literal starts with "0b".
273    Binary = 2,
274    /// Literal starts with "0o".
275    Octal = 8,
276    /// Literal doesn't contain a prefix.
277    Decimal = 10,
278    /// Literal starts with "0x".
279    Hexadecimal = 16,
280}
281
282/// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
283/// but shebang isn't a part of rust syntax.
284pub fn strip_shebang(input: &str) -> Option<usize> {
285    // Shebang must start with `#!` literally, without any preceding whitespace.
286    // For simplicity we consider any line starting with `#!` a shebang,
287    // regardless of restrictions put on shebangs by specific platforms.
288    if let Some(input_tail) = input.strip_prefix("#!") {
289        // Ok, this is a shebang but if the next non-whitespace token is `[`,
290        // then it may be valid Rust code, so consider it Rust code.
291        let next_non_whitespace_token =
292            tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| {
293                !#[allow(non_exhaustive_omitted_patterns)] match tok {
    TokenKind::Whitespace | TokenKind::LineComment { doc_style: None } |
        TokenKind::BlockComment { doc_style: None, .. } => true,
    _ => false,
}matches!(
294                    tok,
295                    TokenKind::Whitespace
296                        | TokenKind::LineComment { doc_style: None }
297                        | TokenKind::BlockComment { doc_style: None, .. }
298                )
299            });
300        if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
301            // No other choice than to consider this a shebang.
302            return Some(2 + input_tail.lines().next().unwrap_or_default().len());
303        }
304    }
305    None
306}
307
308/// Validates a raw string literal. Used for getting more information about a
309/// problem with a `RawStr`/`RawByteStr` with a `None` field.
310#[inline]
311pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
312    if true {
    if !!input.is_empty() {
        ::core::panicking::panic("assertion failed: !input.is_empty()")
    };
};debug_assert!(!input.is_empty());
313    let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
314    // Move past the leading `r` or `br`.
315    for _ in 0..prefix_len {
316        cursor.bump().unwrap();
317    }
318    cursor.raw_double_quoted_string(prefix_len).map(|_| ())
319}
320
321/// Creates an iterator that produces tokens from the input string.
322///
323/// When parsing a full Rust document,
324/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`].
325///
326/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`]
327pub fn tokenize(
328    input: &str,
329    frontmatter_allowed: FrontmatterAllowed,
330) -> impl Iterator<Item = Token> {
331    let mut cursor = Cursor::new(input, frontmatter_allowed);
332    std::iter::from_fn(move || {
333        let token = cursor.advance_token();
334        if token.kind != TokenKind::Eof { Some(token) } else { None }
335    })
336}
337
338/// True if `c` is considered a whitespace according to Rust language definition.
339/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
340/// for definitions of these classes.
341pub fn is_whitespace(c: char) -> bool {
342    // This is Pattern_White_Space.
343    //
344    // Note that this set is stable (ie, it doesn't change with different
345    // Unicode versions), so it's ok to just hard-code the values.
346
347    #[allow(non_exhaustive_omitted_patterns)] match c {
    '\u{000A}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0085}' |
        '\u{2028}' | '\u{2029}' | '\u{200E}' | '\u{200F}' | '\u{0009}' |
        '\u{0020}' => true,
    _ => false,
}matches!(
348        c,
349        // End-of-line characters
350        | '\u{000A}' // line feed (\n)
351        | '\u{000B}' // vertical tab
352        | '\u{000C}' // form feed
353        | '\u{000D}' // carriage return (\r)
354        | '\u{0085}' // next line (from latin1)
355        | '\u{2028}' // LINE SEPARATOR
356        | '\u{2029}' // PARAGRAPH SEPARATOR
357
358        // `Default_Ignorable_Code_Point` characters
359        | '\u{200E}' // LEFT-TO-RIGHT MARK
360        | '\u{200F}' // RIGHT-TO-LEFT MARK
361
362        // Horizontal space characters
363        | '\u{0009}'   // tab (\t)
364        | '\u{0020}' // space
365    )
366}
367
368/// True if `c` is considered horizontal whitespace according to Rust language definition.
369pub fn is_horizontal_whitespace(c: char) -> bool {
370    // This is the horizontal space subset of `Pattern_White_Space` as
371    // categorized by UAX #31, Section 4.1.
372    //
373    // Note that this set is stable (ie, it doesn't change with different
374    // Unicode versions), so it's ok to just hard-code the values.
375
376    #[allow(non_exhaustive_omitted_patterns)] match c {
    '\u{0009}' | '\u{0020}' => true,
    _ => false,
}matches!(
377        c,
378        // Horizontal space characters
379        '\u{0009}'   // tab (\t)
380        | '\u{0020}' // space
381    )
382}
383
384/// True if `c` is valid as a first character of an identifier.
385/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
386/// a formal definition of valid identifier name.
387pub fn is_id_start(c: char) -> bool {
388    // This is XID_Start OR '_' (which formally is not a XID_Start).
389    c == '_' || unicode_ident::is_xid_start(c)
390}
391
392/// True if `c` is valid as a non-first character of an identifier.
393/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
394/// a formal definition of valid identifier name.
395pub fn is_id_continue(c: char) -> bool {
396    unicode_ident::is_xid_continue(c)
397}
398
399/// The passed string is lexically an identifier.
400pub fn is_ident(string: &str) -> bool {
401    let mut chars = string.chars();
402    if let Some(start) = chars.next() {
403        is_id_start(start) && chars.all(is_id_continue)
404    } else {
405        false
406    }
407}
408
409pub enum FrontmatterAllowed {
410    Yes,
411    No,
412}
413
414/// Peekable iterator over a char sequence.
415///
416/// Next characters can be peeked via `first` method,
417/// and position can be shifted forward via `bump` method.
418pub struct Cursor<'a> {
419    len_remaining: usize,
420    /// Iterator over chars. Slightly faster than a &str.
421    chars: Chars<'a>,
422    pub(crate) frontmatter_allowed: FrontmatterAllowed,
423    #[cfg(debug_assertions)]
424    prev: char,
425}
426
427const EOF_CHAR: char = '\0';
428
429impl<'a> Cursor<'a> {
430    pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
431        Cursor {
432            len_remaining: input.len(),
433            chars: input.chars(),
434            frontmatter_allowed,
435            #[cfg(debug_assertions)]
436            prev: EOF_CHAR,
437        }
438    }
439
440    pub fn as_str(&self) -> &'a str {
441        self.chars.as_str()
442    }
443
444    /// Returns the last eaten symbol (or `'\0'` in release builds).
445    /// (For debug assertions only.)
446    pub(crate) fn prev(&self) -> char {
447        #[cfg(debug_assertions)]
448        {
449            self.prev
450        }
451
452        #[cfg(not(debug_assertions))]
453        {
454            EOF_CHAR
455        }
456    }
457
458    /// Peeks the next symbol from the input stream without consuming it.
459    /// If requested position doesn't exist, `EOF_CHAR` is returned.
460    /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
461    /// it should be checked with `is_eof` method.
462    pub fn first(&self) -> char {
463        // `.next()` optimizes better than `.nth(0)`
464        self.chars.clone().next().unwrap_or(EOF_CHAR)
465    }
466
467    /// Peeks the second symbol from the input stream without consuming it.
468    pub(crate) fn second(&self) -> char {
469        // `.next()` optimizes better than `.nth(1)`
470        let mut iter = self.chars.clone();
471        iter.next();
472        iter.next().unwrap_or(EOF_CHAR)
473    }
474
475    /// Peeks the third symbol from the input stream without consuming it.
476    pub fn third(&self) -> char {
477        // `.next()` optimizes better than `.nth(2)`
478        let mut iter = self.chars.clone();
479        iter.next();
480        iter.next();
481        iter.next().unwrap_or(EOF_CHAR)
482    }
483
484    /// Checks if there is nothing more to consume.
485    pub(crate) fn is_eof(&self) -> bool {
486        self.chars.as_str().is_empty()
487    }
488
489    /// Returns amount of already consumed symbols.
490    pub(crate) fn pos_within_token(&self) -> u32 {
491        (self.len_remaining - self.chars.as_str().len()) as u32
492    }
493
494    /// Resets the number of bytes consumed to 0.
495    pub(crate) fn reset_pos_within_token(&mut self) {
496        self.len_remaining = self.chars.as_str().len();
497    }
498
499    /// Moves to the next character.
500    pub(crate) fn bump(&mut self) -> Option<char> {
501        let c = self.chars.next()?;
502
503        #[cfg(debug_assertions)]
504        {
505            self.prev = c;
506        }
507
508        Some(c)
509    }
510
511    /// Moves to a substring by a number of bytes.
512    pub(crate) fn bump_bytes(&mut self, n: usize) {
513        self.chars = self.as_str()[n..].chars();
514    }
515
516    /// Eats symbols while predicate returns true or until the end of file is reached.
517    pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
518        // It was tried making optimized version of this for eg. line comments, but
519        // LLVM can inline all of this and compile it down to fast iteration over bytes.
520        while predicate(self.first()) && !self.is_eof() {
521            self.bump();
522        }
523    }
524
525    pub(crate) fn eat_until(&mut self, byte: u8) {
526        self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
527            Some(index) => self.as_str()[index..].chars(),
528            None => "".chars(),
529        }
530    }
531
532    /// Parses a token from the input string.
533    pub fn advance_token(&mut self) -> Token {
534        let Some(first_char) = self.bump() else {
535            return Token::new(TokenKind::Eof, 0);
536        };
537
538        let token_kind = match first_char {
539            c if #[allow(non_exhaustive_omitted_patterns)] match self.frontmatter_allowed {
    FrontmatterAllowed::Yes => true,
    _ => false,
}matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
540                && is_whitespace(c) =>
541            {
542                let mut last = first_char;
543                while is_whitespace(self.first()) {
544                    let Some(c) = self.bump() else {
545                        break;
546                    };
547                    last = c;
548                }
549                // invalid frontmatter opening as whitespace preceding it isn't newline.
550                // combine the whitespace and the frontmatter to a single token as we shall
551                // error later.
552                if last != '\n' && self.as_str().starts_with("---") {
553                    self.bump();
554                    self.frontmatter(true)
555                } else {
556                    Whitespace
557                }
558            }
559            '-' if #[allow(non_exhaustive_omitted_patterns)] match self.frontmatter_allowed {
    FrontmatterAllowed::Yes => true,
    _ => false,
}matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
560                && self.as_str().starts_with("--") =>
561            {
562                // happy path
563                self.frontmatter(false)
564            }
565            // Slash, comment or block comment.
566            '/' => match self.first() {
567                '/' => self.line_comment(),
568                '*' => self.block_comment(),
569                _ => Slash,
570            },
571
572            // Whitespace sequence.
573            c if is_whitespace(c) => self.whitespace(),
574
575            // Raw identifier, raw string literal or identifier.
576            'r' => match (self.first(), self.second()) {
577                ('#', c1) if is_id_start(c1) => self.raw_ident(),
578                ('#', _) | ('"', _) => {
579                    let res = self.raw_double_quoted_string(1);
580                    let suffix_start = self.pos_within_token();
581                    if res.is_ok() {
582                        self.eat_literal_suffix();
583                    }
584                    let kind = RawStr { n_hashes: res.ok() };
585                    Literal { kind, suffix_start }
586                }
587                _ => self.ident_or_unknown_prefix(),
588            },
589
590            // Byte literal, byte string literal, raw byte string literal or identifier.
591            'b' => self.c_or_byte_string(
592                |terminated| ByteStr { terminated },
593                |n_hashes| RawByteStr { n_hashes },
594                Some(|terminated| Byte { terminated }),
595            ),
596
597            // c-string literal, raw c-string literal or identifier.
598            'c' => self.c_or_byte_string(
599                |terminated| CStr { terminated },
600                |n_hashes| RawCStr { n_hashes },
601                None,
602            ),
603
604            // Identifier (this should be checked after other variant that can
605            // start as identifier).
606            c if is_id_start(c) => self.ident_or_unknown_prefix(),
607
608            // Numeric literal.
609            c @ '0'..='9' => {
610                let literal_kind = self.number(c);
611                let suffix_start = self.pos_within_token();
612                self.eat_literal_suffix();
613                TokenKind::Literal { kind: literal_kind, suffix_start }
614            }
615
616            // Guarded string literal prefix: `#"` or `##`
617            '#' if #[allow(non_exhaustive_omitted_patterns)] match self.first() {
    '"' | '#' => true,
    _ => false,
}matches!(self.first(), '"' | '#') => {
618                self.bump();
619                TokenKind::GuardedStrPrefix
620            }
621
622            // One-symbol tokens.
623            ';' => Semi,
624            ',' => Comma,
625            '.' => Dot,
626            '(' => OpenParen,
627            ')' => CloseParen,
628            '{' => OpenBrace,
629            '}' => CloseBrace,
630            '[' => OpenBracket,
631            ']' => CloseBracket,
632            '@' => At,
633            '#' => Pound,
634            '~' => Tilde,
635            '?' => Question,
636            ':' => Colon,
637            '$' => Dollar,
638            '=' => Eq,
639            '!' => Bang,
640            '<' => Lt,
641            '>' => Gt,
642            '-' => Minus,
643            '&' => And,
644            '|' => Or,
645            '+' => Plus,
646            '*' => Star,
647            '^' => Caret,
648            '%' => Percent,
649
650            // Lifetime or character literal.
651            '\'' => self.lifetime_or_char(),
652
653            // String literal.
654            '"' => {
655                let terminated = self.double_quoted_string();
656                let suffix_start = self.pos_within_token();
657                if terminated {
658                    self.eat_literal_suffix();
659                }
660                let kind = Str { terminated };
661                Literal { kind, suffix_start }
662            }
663            // Identifier starting with an emoji. Only lexed for graceful error recovery.
664            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
665            _ => Unknown,
666        };
667        if #[allow(non_exhaustive_omitted_patterns)] match self.frontmatter_allowed {
    FrontmatterAllowed::Yes => true,
    _ => false,
}matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
668            && !#[allow(non_exhaustive_omitted_patterns)] match token_kind {
    Whitespace => true,
    _ => false,
}matches!(token_kind, Whitespace)
669        {
670            // stop allowing frontmatters after first non-whitespace token
671            self.frontmatter_allowed = FrontmatterAllowed::No;
672        }
673        let res = Token::new(token_kind, self.pos_within_token());
674        self.reset_pos_within_token();
675        res
676    }
677
678    /// Given that one `-` was eaten, eat the rest of the frontmatter.
679    fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind {
680        if true {
    match (&'-', &self.prev()) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!('-', self.prev());
681
682        let pos = self.pos_within_token();
683        self.eat_while(|c| c == '-');
684
685        // one `-` is eaten by the caller.
686        let length_opening = self.pos_within_token() - pos + 1;
687
688        // must be ensured by the caller
689        if true {
    if !(length_opening >= 3) {
        ::core::panicking::panic("assertion failed: length_opening >= 3")
    };
};debug_assert!(length_opening >= 3);
690
691        // whitespace between the opening and the infostring.
692        self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
693
694        // copied from `eat_identifier`, but allows `-` and `.` in infostring to allow something like
695        // `---Cargo.toml` as a valid opener
696        if is_id_start(self.first()) {
697            self.bump();
698            self.eat_while(|c| is_id_continue(c) || c == '-' || c == '.');
699        }
700
701        self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
702        let invalid_infostring = self.first() != '\n';
703
704        let mut found = false;
705        let nl_fence_pattern = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("\n{0:-<1$}", "",
                length_opening as usize))
    })format!("\n{:-<1$}", "", length_opening as usize);
706        if let Some(closing) = self.as_str().find(&nl_fence_pattern) {
707            // candidate found
708            self.bump_bytes(closing + nl_fence_pattern.len());
709            // in case like
710            // ---cargo
711            // --- blahblah
712            // or
713            // ---cargo
714            // ----
715            // combine those stuff into this frontmatter token such that it gets detected later.
716            self.eat_until(b'\n');
717            found = true;
718        }
719
720        if !found {
721            // recovery strategy: a closing statement might have preceding whitespace/newline
722            // but not have enough dashes to properly close. In this case, we eat until there,
723            // and report a mismatch in the parser.
724            let mut rest = self.as_str();
725            // We can look for a shorter closing (starting with four dashes but closing with three)
726            // and other indications that Rust has started and the infostring has ended.
727            let mut potential_closing = rest
728                .find("\n---")
729                // n.b. only in the case where there are dashes, we move the index to the line where
730                // the dashes start as we eat to include that line. For other cases those are Rust code
731                // and not included in the frontmatter.
732                .map(|x| x + 1)
733                .or_else(|| rest.find("\nuse "))
734                .or_else(|| rest.find("\n//!"))
735                .or_else(|| rest.find("\n#!["));
736
737            if potential_closing.is_none() {
738                // a less fortunate recovery if all else fails which finds any dashes preceded by whitespace
739                // on a standalone line. Might be wrong.
740                let mut base_index = 0;
741                while let Some(closing) = rest.find("---") {
742                    let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
743                    if rest[preceding_chars_start..closing].chars().all(is_horizontal_whitespace) {
744                        // candidate found
745                        potential_closing = Some(closing + base_index);
746                        break;
747                    } else {
748                        rest = &rest[closing + 3..];
749                        base_index += closing + 3;
750                    }
751                }
752            }
753
754            if let Some(potential_closing) = potential_closing {
755                // bump to the potential closing, and eat everything on that line.
756                self.bump_bytes(potential_closing);
757                self.eat_until(b'\n');
758            } else {
759                // eat everything. this will get reported as an unclosed frontmatter.
760                self.eat_while(|_| true);
761            }
762        }
763
764        Frontmatter { has_invalid_preceding_whitespace, invalid_infostring }
765    }
766
767    fn line_comment(&mut self) -> TokenKind {
768        if true {
    if !(self.prev() == '/' && self.first() == '/') {
        ::core::panicking::panic("assertion failed: self.prev() == \'/\' && self.first() == \'/\'")
    };
};debug_assert!(self.prev() == '/' && self.first() == '/');
769        self.bump();
770
771        let doc_style = match self.first() {
772            // `//!` is an inner line doc comment.
773            '!' => Some(DocStyle::Inner),
774            // `////` (more than 3 slashes) is not considered a doc comment.
775            '/' if self.second() != '/' => Some(DocStyle::Outer),
776            _ => None,
777        };
778
779        self.eat_until(b'\n');
780        LineComment { doc_style }
781    }
782
783    fn block_comment(&mut self) -> TokenKind {
784        if true {
    if !(self.prev() == '/' && self.first() == '*') {
        ::core::panicking::panic("assertion failed: self.prev() == \'/\' && self.first() == \'*\'")
    };
};debug_assert!(self.prev() == '/' && self.first() == '*');
785        self.bump();
786
787        let doc_style = match self.first() {
788            // `/*!` is an inner block doc comment.
789            '!' => Some(DocStyle::Inner),
790            // `/***` (more than 2 stars) is not considered a doc comment.
791            // `/**/` is not considered a doc comment.
792            '*' if !#[allow(non_exhaustive_omitted_patterns)] match self.second() {
    '*' | '/' => true,
    _ => false,
}matches!(self.second(), '*' | '/') => Some(DocStyle::Outer),
793            _ => None,
794        };
795
796        let mut depth = 1usize;
797        while let Some(c) = self.bump() {
798            match c {
799                '/' if self.first() == '*' => {
800                    self.bump();
801                    depth += 1;
802                }
803                '*' if self.first() == '/' => {
804                    self.bump();
805                    depth -= 1;
806                    if depth == 0 {
807                        // This block comment is closed, so for a construction like "/* */ */"
808                        // there will be a successfully parsed block comment "/* */"
809                        // and " */" will be processed separately.
810                        break;
811                    }
812                }
813                _ => (),
814            }
815        }
816
817        BlockComment { doc_style, terminated: depth == 0 }
818    }
819
820    fn whitespace(&mut self) -> TokenKind {
821        if true {
    if !is_whitespace(self.prev()) {
        ::core::panicking::panic("assertion failed: is_whitespace(self.prev())")
    };
};debug_assert!(is_whitespace(self.prev()));
822        self.eat_while(is_whitespace);
823        Whitespace
824    }
825
826    fn raw_ident(&mut self) -> TokenKind {
827        if true {
    if !(self.prev() == 'r' && self.first() == '#' &&
                is_id_start(self.second())) {
        ::core::panicking::panic("assertion failed: self.prev() == \'r\' && self.first() == \'#\' && is_id_start(self.second())")
    };
};debug_assert!(self.prev() == 'r' && self.first() == '#' && is_id_start(self.second()));
828        // Eat "#" symbol.
829        self.bump();
830        // Eat the identifier part of RawIdent.
831        self.eat_identifier();
832        RawIdent
833    }
834
835    fn ident_or_unknown_prefix(&mut self) -> TokenKind {
836        if true {
    if !is_id_start(self.prev()) {
        ::core::panicking::panic("assertion failed: is_id_start(self.prev())")
    };
};debug_assert!(is_id_start(self.prev()));
837        // Start is already eaten, eat the rest of identifier.
838        self.eat_while(is_id_continue);
839        // Known prefixes must have been handled earlier. So if
840        // we see a prefix here, it is definitely an unknown prefix.
841        match self.first() {
842            '#' | '"' | '\'' => UnknownPrefix,
843            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
844            _ => Ident,
845        }
846    }
847
848    fn invalid_ident(&mut self) -> TokenKind {
849        // Start is already eaten, eat the rest of identifier.
850        self.eat_while(|c| {
851            const ZERO_WIDTH_JOINER: char = '\u{200d}';
852            is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER
853        });
854        // An invalid identifier followed by '#' or '"' or '\'' could be
855        // interpreted as an invalid literal prefix. We don't bother doing that
856        // because the treatment of invalid identifiers and invalid prefixes
857        // would be the same.
858        InvalidIdent
859    }
860
861    fn c_or_byte_string(
862        &mut self,
863        mk_kind: fn(bool) -> LiteralKind,
864        mk_kind_raw: fn(Option<u8>) -> LiteralKind,
865        single_quoted: Option<fn(bool) -> LiteralKind>,
866    ) -> TokenKind {
867        match (self.first(), self.second(), single_quoted) {
868            ('\'', _, Some(single_quoted)) => {
869                self.bump();
870                let terminated = self.single_quoted_string();
871                let suffix_start = self.pos_within_token();
872                if terminated {
873                    self.eat_literal_suffix();
874                }
875                let kind = single_quoted(terminated);
876                Literal { kind, suffix_start }
877            }
878            ('"', _, _) => {
879                self.bump();
880                let terminated = self.double_quoted_string();
881                let suffix_start = self.pos_within_token();
882                if terminated {
883                    self.eat_literal_suffix();
884                }
885                let kind = mk_kind(terminated);
886                Literal { kind, suffix_start }
887            }
888            ('r', '"', _) | ('r', '#', _) => {
889                self.bump();
890                let res = self.raw_double_quoted_string(2);
891                let suffix_start = self.pos_within_token();
892                if res.is_ok() {
893                    self.eat_literal_suffix();
894                }
895                let kind = mk_kind_raw(res.ok());
896                Literal { kind, suffix_start }
897            }
898            _ => self.ident_or_unknown_prefix(),
899        }
900    }
901
902    fn number(&mut self, first_digit: char) -> LiteralKind {
903        if true {
    if !('0' <= self.prev() && self.prev() <= '9') {
        ::core::panicking::panic("assertion failed: \'0\' <= self.prev() && self.prev() <= \'9\'")
    };
};debug_assert!('0' <= self.prev() && self.prev() <= '9');
904        let mut base = Base::Decimal;
905        if first_digit == '0' {
906            // Attempt to parse encoding base.
907            match self.first() {
908                'b' => {
909                    base = Base::Binary;
910                    self.bump();
911                    if !self.eat_decimal_digits() {
912                        return Int { base, empty_int: true };
913                    }
914                }
915                'o' => {
916                    base = Base::Octal;
917                    self.bump();
918                    if !self.eat_decimal_digits() {
919                        return Int { base, empty_int: true };
920                    }
921                }
922                'x' => {
923                    base = Base::Hexadecimal;
924                    self.bump();
925                    if !self.eat_hexadecimal_digits() {
926                        return Int { base, empty_int: true };
927                    }
928                }
929                // Not a base prefix; consume additional digits.
930                '0'..='9' | '_' => {
931                    self.eat_decimal_digits();
932                }
933
934                // Also not a base prefix; nothing more to do here.
935                '.' | 'e' | 'E' => {}
936
937                // Just a 0.
938                _ => return Int { base, empty_int: false },
939            }
940        } else {
941            // No base prefix, parse number in the usual way.
942            self.eat_decimal_digits();
943        }
944
945        match self.first() {
946            // Don't be greedy if this is actually an
947            // integer literal followed by field/method access or a range pattern
948            // (`0..2` and `12.foo()`)
949            '.' if self.second() != '.' && !is_id_start(self.second()) => {
950                // might have stuff after the ., and if it does, it needs to start
951                // with a number
952                self.bump();
953                let mut empty_exponent = false;
954                if self.first().is_ascii_digit() {
955                    self.eat_decimal_digits();
956                    match self.first() {
957                        'e' | 'E' => {
958                            self.bump();
959                            empty_exponent = !self.eat_float_exponent();
960                        }
961                        _ => (),
962                    }
963                }
964                Float { base, empty_exponent }
965            }
966            'e' | 'E' => {
967                self.bump();
968                let empty_exponent = !self.eat_float_exponent();
969                Float { base, empty_exponent }
970            }
971            _ => Int { base, empty_int: false },
972        }
973    }
974
975    fn lifetime_or_char(&mut self) -> TokenKind {
976        if true {
    if !(self.prev() == '\'') {
        ::core::panicking::panic("assertion failed: self.prev() == \'\\\'\'")
    };
};debug_assert!(self.prev() == '\'');
977
978        let can_be_a_lifetime = if self.second() == '\'' {
979            // It's surely not a lifetime.
980            false
981        } else {
982            // If the first symbol is valid for identifier, it can be a lifetime.
983            // Also check if it's a number for a better error reporting (so '0 will
984            // be reported as invalid lifetime and not as unterminated char literal).
985            is_id_start(self.first()) || self.first().is_ascii_digit()
986        };
987
988        if !can_be_a_lifetime {
989            let terminated = self.single_quoted_string();
990            let suffix_start = self.pos_within_token();
991            if terminated {
992                self.eat_literal_suffix();
993            }
994            let kind = Char { terminated };
995            return Literal { kind, suffix_start };
996        }
997
998        if self.first() == 'r' && self.second() == '#' && is_id_start(self.third()) {
999            // Eat "r" and `#`, and identifier start characters.
1000            self.bump();
1001            self.bump();
1002            self.bump();
1003            self.eat_while(is_id_continue);
1004            return RawLifetime;
1005        }
1006
1007        // Either a lifetime or a character literal with
1008        // length greater than 1.
1009        let starts_with_number = self.first().is_ascii_digit();
1010
1011        // Skip the literal contents.
1012        // First symbol can be a number (which isn't a valid identifier start),
1013        // so skip it without any checks.
1014        self.bump();
1015        self.eat_while(is_id_continue);
1016
1017        match self.first() {
1018            // Check if after skipping literal contents we've met a closing
1019            // single quote (which means that user attempted to create a
1020            // string with single quotes).
1021            '\'' => {
1022                self.bump();
1023                let kind = Char { terminated: true };
1024                Literal { kind, suffix_start: self.pos_within_token() }
1025            }
1026            '#' if !starts_with_number => UnknownPrefixLifetime,
1027            _ => Lifetime { starts_with_number },
1028        }
1029    }
1030
1031    fn single_quoted_string(&mut self) -> bool {
1032        if true {
    if !(self.prev() == '\'') {
        ::core::panicking::panic("assertion failed: self.prev() == \'\\\'\'")
    };
};debug_assert!(self.prev() == '\'');
1033        // Check if it's a one-symbol literal.
1034        if self.second() == '\'' && self.first() != '\\' {
1035            self.bump();
1036            self.bump();
1037            return true;
1038        }
1039
1040        // Literal has more than one symbol.
1041
1042        // Parse until either quotes are terminated or error is detected.
1043        loop {
1044            match self.first() {
1045                // Quotes are terminated, finish parsing.
1046                '\'' => {
1047                    self.bump();
1048                    return true;
1049                }
1050                // Probably beginning of the comment, which we don't want to include
1051                // to the error report.
1052                '/' => break,
1053                // Newline without following '\'' means unclosed quote, stop parsing.
1054                '\n' if self.second() != '\'' => break,
1055                // End of file, stop parsing.
1056                EOF_CHAR if self.is_eof() => break,
1057                // Escaped slash is considered one character, so bump twice.
1058                '\\' => {
1059                    self.bump();
1060                    self.bump();
1061                }
1062                // Skip the character.
1063                _ => {
1064                    self.bump();
1065                }
1066            }
1067        }
1068        // String was not terminated.
1069        false
1070    }
1071
1072    /// Eats double-quoted string and returns true
1073    /// if string is terminated.
1074    fn double_quoted_string(&mut self) -> bool {
1075        if true {
    if !(self.prev() == '"') {
        ::core::panicking::panic("assertion failed: self.prev() == \'\"\'")
    };
};debug_assert!(self.prev() == '"');
1076        while let Some(c) = self.bump() {
1077            match c {
1078                '"' => {
1079                    return true;
1080                }
1081                '\\' if self.first() == '\\' || self.first() == '"' => {
1082                    // Bump again to skip escaped character.
1083                    self.bump();
1084                }
1085                _ => (),
1086            }
1087        }
1088        // End of file reached.
1089        false
1090    }
1091
1092    /// Attempt to lex for a guarded string literal.
1093    ///
1094    /// Used by `rustc_parse::lexer` to lex for guarded strings
1095    /// conditionally based on edition.
1096    ///
1097    /// Note: this will not reset the `Cursor` when a
1098    /// guarded string is not found. It is the caller's
1099    /// responsibility to do so.
1100    pub fn guarded_double_quoted_string(&mut self) -> Option<GuardedStr> {
1101        if true {
    if !(self.prev() != '#') {
        ::core::panicking::panic("assertion failed: self.prev() != \'#\'")
    };
};debug_assert!(self.prev() != '#');
1102
1103        let mut n_start_hashes: u32 = 0;
1104        while self.first() == '#' {
1105            n_start_hashes += 1;
1106            self.bump();
1107        }
1108
1109        if self.first() != '"' {
1110            return None;
1111        }
1112        self.bump();
1113        if true {
    if !(self.prev() == '"') {
        ::core::panicking::panic("assertion failed: self.prev() == \'\"\'")
    };
};debug_assert!(self.prev() == '"');
1114
1115        // Lex the string itself as a normal string literal
1116        // so we can recover that for older editions later.
1117        let terminated = self.double_quoted_string();
1118        if !terminated {
1119            let token_len = self.pos_within_token();
1120            self.reset_pos_within_token();
1121
1122            return Some(GuardedStr { n_hashes: n_start_hashes, terminated: false, token_len });
1123        }
1124
1125        // Consume closing '#' symbols.
1126        // Note that this will not consume extra trailing `#` characters:
1127        // `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3, .. }`
1128        // followed by a `#` token.
1129        let mut n_end_hashes = 0;
1130        while self.first() == '#' && n_end_hashes < n_start_hashes {
1131            n_end_hashes += 1;
1132            self.bump();
1133        }
1134
1135        // Reserved syntax, always an error, so it doesn't matter if
1136        // `n_start_hashes != n_end_hashes`.
1137
1138        self.eat_literal_suffix();
1139
1140        let token_len = self.pos_within_token();
1141        self.reset_pos_within_token();
1142
1143        Some(GuardedStr { n_hashes: n_start_hashes, terminated: true, token_len })
1144    }
1145
1146    /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
1147    fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
1148        // Wrap the actual function to handle the error with too many hashes.
1149        // This way, it eats the whole raw string.
1150        let n_hashes = self.raw_string_unvalidated(prefix_len)?;
1151        // Only up to 255 `#`s are allowed in raw strings
1152        match u8::try_from(n_hashes) {
1153            Ok(num) => Ok(num),
1154            Err(_) => Err(RawStrError::TooManyDelimiters { found: n_hashes }),
1155        }
1156    }
1157
1158    fn raw_string_unvalidated(&mut self, prefix_len: u32) -> Result<u32, RawStrError> {
1159        if true {
    if !(self.prev() == 'r') {
        ::core::panicking::panic("assertion failed: self.prev() == \'r\'")
    };
};debug_assert!(self.prev() == 'r');
1160        let start_pos = self.pos_within_token();
1161        let mut possible_terminator_offset = None;
1162        let mut max_hashes = 0;
1163
1164        // Count opening '#' symbols.
1165        let mut eaten = 0;
1166        while self.first() == '#' {
1167            eaten += 1;
1168            self.bump();
1169        }
1170        let n_start_hashes = eaten;
1171
1172        // Check that string is started.
1173        match self.bump() {
1174            Some('"') => (),
1175            c => {
1176                let c = c.unwrap_or(EOF_CHAR);
1177                return Err(RawStrError::InvalidStarter { bad_char: c });
1178            }
1179        }
1180
1181        // Skip the string contents and on each '#' character met, check if this is
1182        // a raw string termination.
1183        loop {
1184            self.eat_until(b'"');
1185
1186            if self.is_eof() {
1187                return Err(RawStrError::NoTerminator {
1188                    expected: n_start_hashes,
1189                    found: max_hashes,
1190                    possible_terminator_offset,
1191                });
1192            }
1193
1194            // Eat closing double quote.
1195            self.bump();
1196
1197            // Check that amount of closing '#' symbols
1198            // is equal to the amount of opening ones.
1199            // Note that this will not consume extra trailing `#` characters:
1200            // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
1201            // followed by a `#` token.
1202            let mut n_end_hashes = 0;
1203            while self.first() == '#' && n_end_hashes < n_start_hashes {
1204                n_end_hashes += 1;
1205                self.bump();
1206            }
1207
1208            if n_end_hashes == n_start_hashes {
1209                return Ok(n_start_hashes);
1210            } else if n_end_hashes > max_hashes {
1211                // Keep track of possible terminators to give a hint about
1212                // where there might be a missing terminator
1213                possible_terminator_offset =
1214                    Some(self.pos_within_token() - start_pos - n_end_hashes + prefix_len);
1215                max_hashes = n_end_hashes;
1216            }
1217        }
1218    }
1219
1220    fn eat_decimal_digits(&mut self) -> bool {
1221        let mut has_digits = false;
1222        loop {
1223            match self.first() {
1224                '_' => {
1225                    self.bump();
1226                }
1227                '0'..='9' => {
1228                    has_digits = true;
1229                    self.bump();
1230                }
1231                _ => break,
1232            }
1233        }
1234        has_digits
1235    }
1236
1237    fn eat_hexadecimal_digits(&mut self) -> bool {
1238        let mut has_digits = false;
1239        loop {
1240            match self.first() {
1241                '_' => {
1242                    self.bump();
1243                }
1244                '0'..='9' | 'a'..='f' | 'A'..='F' => {
1245                    has_digits = true;
1246                    self.bump();
1247                }
1248                _ => break,
1249            }
1250        }
1251        has_digits
1252    }
1253
1254    /// Eats the float exponent. Returns true if at least one digit was met,
1255    /// and returns false otherwise.
1256    fn eat_float_exponent(&mut self) -> bool {
1257        if true {
    if !(self.prev() == 'e' || self.prev() == 'E') {
        ::core::panicking::panic("assertion failed: self.prev() == \'e\' || self.prev() == \'E\'")
    };
};debug_assert!(self.prev() == 'e' || self.prev() == 'E');
1258        if self.first() == '-' || self.first() == '+' {
1259            self.bump();
1260        }
1261        self.eat_decimal_digits()
1262    }
1263
1264    // Eats the suffix of the literal, e.g. "u8".
1265    fn eat_literal_suffix(&mut self) {
1266        self.eat_identifier();
1267    }
1268
1269    // Eats the identifier. Note: succeeds on `_`, which isn't a valid
1270    // identifier.
1271    fn eat_identifier(&mut self) {
1272        if !is_id_start(self.first()) {
1273            return;
1274        }
1275        self.bump();
1276
1277        self.eat_while(is_id_continue);
1278    }
1279}