Skip to main content

rustc_lexer/
lib.rs

1//! Low-level Rust lexer.
2//!
3//! The idea with `rustc_lexer` is to make a reusable library,
4//! by separating out pure lexing and rustc-specific concerns, like spans,
5//! error reporting, and interning. So, rustc_lexer operates directly on `&str`,
6//! produces simple tokens which are a pair of type-tag and a bit of original text,
7//! and does not report errors, instead storing them as flags on the token.
8//!
9//! Tokens produced by this lexer are not yet ready for parsing the Rust syntax.
10//! For that see [`rustc_parse::lexer`], which converts this basic token stream
11//! into wide tokens used by actual parser.
12//!
13//! The purpose of this crate is to convert raw sources into a labeled sequence
14//! of well-known token types, so building an actual Rust token stream will
15//! be easier.
16//!
17//! The main entity of this crate is the [`TokenKind`] enum which represents common
18//! lexeme types.
19//!
20//! [`rustc_parse::lexer`]: ../rustc_parse/lexer/index.html
21
22// tidy-alphabetical-start
23// We want to be able to build this crate with a stable compiler,
24// so no `#![feature]` attributes should be added.
25#![deny(unstable_features)]
26// tidy-alphabetical-end
27
28#[cfg(test)]
29mod tests;
30
31use std::str::Chars;
32
33use LiteralKind::*;
34use TokenKind::*;
35pub use unicode_ident::UNICODE_VERSION;
36use unicode_properties::UnicodeEmoji;
37
38// Make sure that the Unicode version of the dependencies is the same.
39const _: () = {
40    let properties = unicode_properties::UNICODE_VERSION;
41    let ident = unicode_ident::UNICODE_VERSION;
42
43    if properties.0 != ident.0 as u64
44        || properties.1 != ident.1 as u64
45        || properties.2 != ident.2 as u64
46    {
47        {
    ::core::panicking::panic_fmt(format_args!("unicode-properties and unicode-ident must use the same Unicode version, `unicode_properties::UNICODE_VERSION` and `unicode_ident::UNICODE_VERSION` are different."));
};panic!(
48            "unicode-properties and unicode-ident must use the same Unicode version, \
49            `unicode_properties::UNICODE_VERSION` and `unicode_ident::UNICODE_VERSION` are \
50            different."
51        );
52    }
53};
54
55/// Parsed token.
56/// It doesn't contain information about data that has been parsed,
57/// only the type of the token and its size.
58#[derive(#[automatically_derived]
impl ::core::fmt::Debug for Token {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field2_finish(f, "Token", "kind",
            &self.kind, "len", &&self.len)
    }
}Debug)]
59pub struct Token {
60    pub kind: TokenKind,
61    pub len: u32,
62}
63
64impl Token {
65    fn new(kind: TokenKind, len: u32) -> Token {
66        Token { kind, len }
67    }
68}
69
70/// Enum representing common lexeme types.
71#[derive(#[automatically_derived]
impl ::core::clone::Clone for TokenKind {
    #[inline]
    fn clone(&self) -> TokenKind {
        let _: ::core::clone::AssertParamIsClone<Option<DocStyle>>;
        let _: ::core::clone::AssertParamIsClone<Option<DocStyle>>;
        let _: ::core::clone::AssertParamIsClone<bool>;
        let _: ::core::clone::AssertParamIsClone<LiteralKind>;
        let _: ::core::clone::AssertParamIsClone<u32>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for TokenKind { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for TokenKind {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            TokenKind::LineComment { doc_style: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "LineComment", "doc_style", &__self_0),
            TokenKind::BlockComment {
                doc_style: __self_0, terminated: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f,
                    "BlockComment", "doc_style", __self_0, "terminated",
                    &__self_1),
            TokenKind::Whitespace =>
                ::core::fmt::Formatter::write_str(f, "Whitespace"),
            TokenKind::Frontmatter {
                has_invalid_preceding_whitespace: __self_0,
                invalid_infostring: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f,
                    "Frontmatter", "has_invalid_preceding_whitespace", __self_0,
                    "invalid_infostring", &__self_1),
            TokenKind::Ident => ::core::fmt::Formatter::write_str(f, "Ident"),
            TokenKind::InvalidIdent =>
                ::core::fmt::Formatter::write_str(f, "InvalidIdent"),
            TokenKind::RawIdent =>
                ::core::fmt::Formatter::write_str(f, "RawIdent"),
            TokenKind::UnknownPrefix =>
                ::core::fmt::Formatter::write_str(f, "UnknownPrefix"),
            TokenKind::UnknownPrefixLifetime =>
                ::core::fmt::Formatter::write_str(f, "UnknownPrefixLifetime"),
            TokenKind::RawLifetime =>
                ::core::fmt::Formatter::write_str(f, "RawLifetime"),
            TokenKind::GuardedStrPrefix =>
                ::core::fmt::Formatter::write_str(f, "GuardedStrPrefix"),
            TokenKind::Literal { kind: __self_0, suffix_start: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f,
                    "Literal", "kind", __self_0, "suffix_start", &__self_1),
            TokenKind::Lifetime { starts_with_number: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "Lifetime", "starts_with_number", &__self_0),
            TokenKind::Semi => ::core::fmt::Formatter::write_str(f, "Semi"),
            TokenKind::Comma => ::core::fmt::Formatter::write_str(f, "Comma"),
            TokenKind::Dot => ::core::fmt::Formatter::write_str(f, "Dot"),
            TokenKind::OpenParen =>
                ::core::fmt::Formatter::write_str(f, "OpenParen"),
            TokenKind::CloseParen =>
                ::core::fmt::Formatter::write_str(f, "CloseParen"),
            TokenKind::OpenBrace =>
                ::core::fmt::Formatter::write_str(f, "OpenBrace"),
            TokenKind::CloseBrace =>
                ::core::fmt::Formatter::write_str(f, "CloseBrace"),
            TokenKind::OpenBracket =>
                ::core::fmt::Formatter::write_str(f, "OpenBracket"),
            TokenKind::CloseBracket =>
                ::core::fmt::Formatter::write_str(f, "CloseBracket"),
            TokenKind::At => ::core::fmt::Formatter::write_str(f, "At"),
            TokenKind::Pound => ::core::fmt::Formatter::write_str(f, "Pound"),
            TokenKind::Tilde => ::core::fmt::Formatter::write_str(f, "Tilde"),
            TokenKind::Question =>
                ::core::fmt::Formatter::write_str(f, "Question"),
            TokenKind::Colon => ::core::fmt::Formatter::write_str(f, "Colon"),
            TokenKind::Dollar =>
                ::core::fmt::Formatter::write_str(f, "Dollar"),
            TokenKind::Eq => ::core::fmt::Formatter::write_str(f, "Eq"),
            TokenKind::Bang => ::core::fmt::Formatter::write_str(f, "Bang"),
            TokenKind::Lt => ::core::fmt::Formatter::write_str(f, "Lt"),
            TokenKind::Gt => ::core::fmt::Formatter::write_str(f, "Gt"),
            TokenKind::Minus => ::core::fmt::Formatter::write_str(f, "Minus"),
            TokenKind::And => ::core::fmt::Formatter::write_str(f, "And"),
            TokenKind::Or => ::core::fmt::Formatter::write_str(f, "Or"),
            TokenKind::Plus => ::core::fmt::Formatter::write_str(f, "Plus"),
            TokenKind::Star => ::core::fmt::Formatter::write_str(f, "Star"),
            TokenKind::Slash => ::core::fmt::Formatter::write_str(f, "Slash"),
            TokenKind::Caret => ::core::fmt::Formatter::write_str(f, "Caret"),
            TokenKind::Percent =>
                ::core::fmt::Formatter::write_str(f, "Percent"),
            TokenKind::Unknown =>
                ::core::fmt::Formatter::write_str(f, "Unknown"),
            TokenKind::Eof => ::core::fmt::Formatter::write_str(f, "Eof"),
        }
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for TokenKind {
    #[inline]
    fn eq(&self, other: &TokenKind) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr &&
            match (self, other) {
                (TokenKind::LineComment { doc_style: __self_0 },
                    TokenKind::LineComment { doc_style: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (TokenKind::BlockComment {
                    doc_style: __self_0, terminated: __self_1 },
                    TokenKind::BlockComment {
                    doc_style: __arg1_0, terminated: __arg1_1 }) =>
                    __self_1 == __arg1_1 && __self_0 == __arg1_0,
                (TokenKind::Frontmatter {
                    has_invalid_preceding_whitespace: __self_0,
                    invalid_infostring: __self_1 }, TokenKind::Frontmatter {
                    has_invalid_preceding_whitespace: __arg1_0,
                    invalid_infostring: __arg1_1 }) =>
                    __self_0 == __arg1_0 && __self_1 == __arg1_1,
                (TokenKind::Literal { kind: __self_0, suffix_start: __self_1
                    }, TokenKind::Literal {
                    kind: __arg1_0, suffix_start: __arg1_1 }) =>
                    __self_1 == __arg1_1 && __self_0 == __arg1_0,
                (TokenKind::Lifetime { starts_with_number: __self_0 },
                    TokenKind::Lifetime { starts_with_number: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                _ => true,
            }
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for TokenKind {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<Option<DocStyle>>;
        let _: ::core::cmp::AssertParamIsEq<Option<DocStyle>>;
        let _: ::core::cmp::AssertParamIsEq<bool>;
        let _: ::core::cmp::AssertParamIsEq<LiteralKind>;
        let _: ::core::cmp::AssertParamIsEq<u32>;
    }
}Eq)]
72pub enum TokenKind {
73    /// A line comment, e.g. `// comment`.
74    LineComment {
75        doc_style: Option<DocStyle>,
76    },
77
78    /// A block comment, e.g. `/* block comment */`.
79    ///
80    /// Block comments can be recursive, so a sequence like `/* /* */`
81    /// will not be considered terminated and will result in a parsing error.
82    BlockComment {
83        doc_style: Option<DocStyle>,
84        terminated: bool,
85    },
86
87    /// Any whitespace character sequence.
88    Whitespace,
89
90    Frontmatter {
91        has_invalid_preceding_whitespace: bool,
92        invalid_infostring: bool,
93    },
94
95    /// An identifier or keyword, e.g. `ident` or `continue`.
96    Ident,
97
98    /// An identifier that is invalid because it contains emoji.
99    InvalidIdent,
100
101    /// A raw identifier, e.g. "r#ident".
102    RawIdent,
103
104    /// An unknown literal prefix, like `foo#`, `foo'`, `foo"`. Excludes
105    /// literal prefixes that contain emoji, which are considered "invalid".
106    ///
107    /// Note that only the
108    /// prefix (`foo`) is included in the token, not the separator (which is
109    /// lexed as its own distinct token). In Rust 2021 and later, reserved
110    /// prefixes are reported as errors; in earlier editions, they result in a
111    /// (allowed by default) lint, and are treated as regular identifier
112    /// tokens.
113    UnknownPrefix,
114
115    /// An unknown prefix in a lifetime, like `'foo#`.
116    ///
117    /// Like `UnknownPrefix`, only the `'` and prefix are included in the token
118    /// and not the separator.
119    UnknownPrefixLifetime,
120
121    /// A raw lifetime, e.g. `'r#foo`. In edition < 2021 it will be split into
122    /// several tokens: `'r` and `#` and `foo`.
123    RawLifetime,
124
125    /// Guarded string literal prefix: `#"` or `##`.
126    ///
127    /// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
128    /// Split into the component tokens on older editions.
129    GuardedStrPrefix,
130
131    /// Literals, e.g. `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
132    /// suffix, but may be present here on string and float literals. Users of
133    /// this type will need to check for and reject that case.
134    ///
135    /// See [LiteralKind] for more details.
136    Literal {
137        kind: LiteralKind,
138        suffix_start: u32,
139    },
140
141    /// A lifetime, e.g. `'a`.
142    Lifetime {
143        starts_with_number: bool,
144    },
145
146    /// `;`
147    Semi,
148    /// `,`
149    Comma,
150    /// `.`
151    Dot,
152    /// `(`
153    OpenParen,
154    /// `)`
155    CloseParen,
156    /// `{`
157    OpenBrace,
158    /// `}`
159    CloseBrace,
160    /// `[`
161    OpenBracket,
162    /// `]`
163    CloseBracket,
164    /// `@`
165    At,
166    /// `#`
167    Pound,
168    /// `~`
169    Tilde,
170    /// `?`
171    Question,
172    /// `:`
173    Colon,
174    /// `$`
175    Dollar,
176    /// `=`
177    Eq,
178    /// `!`
179    Bang,
180    /// `<`
181    Lt,
182    /// `>`
183    Gt,
184    /// `-`
185    Minus,
186    /// `&`
187    And,
188    /// `|`
189    Or,
190    /// `+`
191    Plus,
192    /// `*`
193    Star,
194    /// `/`
195    Slash,
196    /// `^`
197    Caret,
198    /// `%`
199    Percent,
200
201    /// Unknown token, not expected by the lexer, e.g. "№"
202    Unknown,
203
204    /// End of input.
205    Eof,
206}
207
208#[derive(#[automatically_derived]
impl ::core::clone::Clone for DocStyle {
    #[inline]
    fn clone(&self) -> DocStyle { *self }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for DocStyle { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for DocStyle {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f,
            match self {
                DocStyle::Outer => "Outer",
                DocStyle::Inner => "Inner",
            })
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for DocStyle {
    #[inline]
    fn eq(&self, other: &DocStyle) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for DocStyle {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) {}
}Eq)]
209pub enum DocStyle {
210    Outer,
211    Inner,
212}
213
214/// Enum representing the literal types supported by the lexer.
215///
216/// Note that the suffix is *not* considered when deciding the `LiteralKind` in
217/// this type. This means that float literals like `1f32` are classified by this
218/// type as `Int`. (Compare against `rustc_ast::token::LitKind` and
219/// `rustc_ast::ast::LitKind`).
220#[derive(#[automatically_derived]
impl ::core::clone::Clone for LiteralKind {
    #[inline]
    fn clone(&self) -> LiteralKind {
        let _: ::core::clone::AssertParamIsClone<Base>;
        let _: ::core::clone::AssertParamIsClone<bool>;
        let _: ::core::clone::AssertParamIsClone<Option<u8>>;
        let _: ::core::clone::AssertParamIsClone<Option<u8>>;
        let _: ::core::clone::AssertParamIsClone<Option<u8>>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for LiteralKind { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for LiteralKind {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            LiteralKind::Int { base: __self_0, empty_int: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f, "Int",
                    "base", __self_0, "empty_int", &__self_1),
            LiteralKind::Float { base: __self_0, empty_exponent: __self_1 } =>
                ::core::fmt::Formatter::debug_struct_field2_finish(f, "Float",
                    "base", __self_0, "empty_exponent", &__self_1),
            LiteralKind::Char { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f, "Char",
                    "terminated", &__self_0),
            LiteralKind::Byte { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f, "Byte",
                    "terminated", &__self_0),
            LiteralKind::Str { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f, "Str",
                    "terminated", &__self_0),
            LiteralKind::ByteStr { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "ByteStr", "terminated", &__self_0),
            LiteralKind::CStr { terminated: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f, "CStr",
                    "terminated", &__self_0),
            LiteralKind::RawStr { n_hashes: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "RawStr", "n_hashes", &__self_0),
            LiteralKind::RawByteStr { n_hashes: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "RawByteStr", "n_hashes", &__self_0),
            LiteralKind::RawCStr { n_hashes: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "RawCStr", "n_hashes", &__self_0),
        }
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for LiteralKind {
    #[inline]
    fn eq(&self, other: &LiteralKind) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr &&
            match (self, other) {
                (LiteralKind::Int { base: __self_0, empty_int: __self_1 },
                    LiteralKind::Int { base: __arg1_0, empty_int: __arg1_1 }) =>
                    __self_1 == __arg1_1 && __self_0 == __arg1_0,
                (LiteralKind::Float { base: __self_0, empty_exponent: __self_1
                    }, LiteralKind::Float {
                    base: __arg1_0, empty_exponent: __arg1_1 }) =>
                    __self_1 == __arg1_1 && __self_0 == __arg1_0,
                (LiteralKind::Char { terminated: __self_0 },
                    LiteralKind::Char { terminated: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::Byte { terminated: __self_0 },
                    LiteralKind::Byte { terminated: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::Str { terminated: __self_0 }, LiteralKind::Str {
                    terminated: __arg1_0 }) => __self_0 == __arg1_0,
                (LiteralKind::ByteStr { terminated: __self_0 },
                    LiteralKind::ByteStr { terminated: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::CStr { terminated: __self_0 },
                    LiteralKind::CStr { terminated: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::RawStr { n_hashes: __self_0 },
                    LiteralKind::RawStr { n_hashes: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::RawByteStr { n_hashes: __self_0 },
                    LiteralKind::RawByteStr { n_hashes: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (LiteralKind::RawCStr { n_hashes: __self_0 },
                    LiteralKind::RawCStr { n_hashes: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                _ => unsafe { ::core::intrinsics::unreachable() }
            }
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for LiteralKind {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<Base>;
        let _: ::core::cmp::AssertParamIsEq<bool>;
        let _: ::core::cmp::AssertParamIsEq<Option<u8>>;
        let _: ::core::cmp::AssertParamIsEq<Option<u8>>;
        let _: ::core::cmp::AssertParamIsEq<Option<u8>>;
    }
}Eq, #[automatically_derived]
impl ::core::cmp::PartialOrd for LiteralKind {
    #[inline]
    fn partial_cmp(&self, other: &LiteralKind)
        -> ::core::option::Option<::core::cmp::Ordering> {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match (self, other) {
            (LiteralKind::Int { base: __self_0, empty_int: __self_1 },
                LiteralKind::Int { base: __arg1_0, empty_int: __arg1_1 }) =>
                match ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0)
                    {
                    ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                        => ::core::cmp::PartialOrd::partial_cmp(__self_1, __arg1_1),
                    cmp => cmp,
                },
            (LiteralKind::Float { base: __self_0, empty_exponent: __self_1 },
                LiteralKind::Float { base: __arg1_0, empty_exponent: __arg1_1
                }) =>
                match ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0)
                    {
                    ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                        => ::core::cmp::PartialOrd::partial_cmp(__self_1, __arg1_1),
                    cmp => cmp,
                },
            (LiteralKind::Char { terminated: __self_0 }, LiteralKind::Char {
                terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::Byte { terminated: __self_0 }, LiteralKind::Byte {
                terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::Str { terminated: __self_0 }, LiteralKind::Str {
                terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::ByteStr { terminated: __self_0 },
                LiteralKind::ByteStr { terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::CStr { terminated: __self_0 }, LiteralKind::CStr {
                terminated: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::RawStr { n_hashes: __self_0 }, LiteralKind::RawStr {
                n_hashes: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::RawByteStr { n_hashes: __self_0 },
                LiteralKind::RawByteStr { n_hashes: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (LiteralKind::RawCStr { n_hashes: __self_0 },
                LiteralKind::RawCStr { n_hashes: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            _ =>
                ::core::cmp::PartialOrd::partial_cmp(&__self_discr,
                    &__arg1_discr),
        }
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for LiteralKind {
    #[inline]
    fn cmp(&self, other: &LiteralKind) -> ::core::cmp::Ordering {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match ::core::cmp::Ord::cmp(&__self_discr, &__arg1_discr) {
            ::core::cmp::Ordering::Equal =>
                match (self, other) {
                    (LiteralKind::Int { base: __self_0, empty_int: __self_1 },
                        LiteralKind::Int { base: __arg1_0, empty_int: __arg1_1 }) =>
                        match ::core::cmp::Ord::cmp(__self_0, __arg1_0) {
                            ::core::cmp::Ordering::Equal =>
                                ::core::cmp::Ord::cmp(__self_1, __arg1_1),
                            cmp => cmp,
                        },
                    (LiteralKind::Float {
                        base: __self_0, empty_exponent: __self_1 },
                        LiteralKind::Float {
                        base: __arg1_0, empty_exponent: __arg1_1 }) =>
                        match ::core::cmp::Ord::cmp(__self_0, __arg1_0) {
                            ::core::cmp::Ordering::Equal =>
                                ::core::cmp::Ord::cmp(__self_1, __arg1_1),
                            cmp => cmp,
                        },
                    (LiteralKind::Char { terminated: __self_0 },
                        LiteralKind::Char { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::Byte { terminated: __self_0 },
                        LiteralKind::Byte { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::Str { terminated: __self_0 },
                        LiteralKind::Str { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::ByteStr { terminated: __self_0 },
                        LiteralKind::ByteStr { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::CStr { terminated: __self_0 },
                        LiteralKind::CStr { terminated: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::RawStr { n_hashes: __self_0 },
                        LiteralKind::RawStr { n_hashes: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::RawByteStr { n_hashes: __self_0 },
                        LiteralKind::RawByteStr { n_hashes: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (LiteralKind::RawCStr { n_hashes: __self_0 },
                        LiteralKind::RawCStr { n_hashes: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    _ => unsafe { ::core::intrinsics::unreachable() }
                },
            cmp => cmp,
        }
    }
}Ord)]
221pub enum LiteralKind {
222    /// `12_u8`, `0o100`, `0b120i99`, `1f32`.
223    Int { base: Base, empty_int: bool },
224    /// `12.34f32`, `1e3`, but not `1f32`.
225    Float { base: Base, empty_exponent: bool },
226    /// `'a'`, `'\\'`, `'''`, `';`
227    Char { terminated: bool },
228    /// `b'a'`, `b'\\'`, `b'''`, `b';`
229    Byte { terminated: bool },
230    /// `"abc"`, `"abc`
231    Str { terminated: bool },
232    /// `b"abc"`, `b"abc`
233    ByteStr { terminated: bool },
234    /// `c"abc"`, `c"abc`
235    CStr { terminated: bool },
236    /// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
237    /// an invalid literal.
238    RawStr { n_hashes: Option<u8> },
239    /// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
240    /// indicates an invalid literal.
241    RawByteStr { n_hashes: Option<u8> },
242    /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
243    RawCStr { n_hashes: Option<u8> },
244}
245
246/// `#"abc"#`, `##"a"` (fewer closing), or even `#"a` (unterminated).
247///
248/// Can capture fewer closing hashes than starting hashes,
249/// for more efficient lexing and better backwards diagnostics.
250#[derive(#[automatically_derived]
impl ::core::clone::Clone for GuardedStr {
    #[inline]
    fn clone(&self) -> GuardedStr {
        let _: ::core::clone::AssertParamIsClone<u32>;
        let _: ::core::clone::AssertParamIsClone<bool>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for GuardedStr { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for GuardedStr {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::debug_struct_field3_finish(f, "GuardedStr",
            "n_hashes", &self.n_hashes, "terminated", &self.terminated,
            "token_len", &&self.token_len)
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for GuardedStr {
    #[inline]
    fn eq(&self, other: &GuardedStr) -> bool {
        self.n_hashes == other.n_hashes && self.terminated == other.terminated
            && self.token_len == other.token_len
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for GuardedStr {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<u32>;
        let _: ::core::cmp::AssertParamIsEq<bool>;
    }
}Eq, #[automatically_derived]
impl ::core::cmp::PartialOrd for GuardedStr {
    #[inline]
    fn partial_cmp(&self, other: &GuardedStr)
        -> ::core::option::Option<::core::cmp::Ordering> {
        match ::core::cmp::PartialOrd::partial_cmp(&self.n_hashes,
                &other.n_hashes) {
            ::core::option::Option::Some(::core::cmp::Ordering::Equal) =>
                match ::core::cmp::PartialOrd::partial_cmp(&self.terminated,
                        &other.terminated) {
                    ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                        =>
                        ::core::cmp::PartialOrd::partial_cmp(&self.token_len,
                            &other.token_len),
                    cmp => cmp,
                },
            cmp => cmp,
        }
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for GuardedStr {
    #[inline]
    fn cmp(&self, other: &GuardedStr) -> ::core::cmp::Ordering {
        match ::core::cmp::Ord::cmp(&self.n_hashes, &other.n_hashes) {
            ::core::cmp::Ordering::Equal =>
                match ::core::cmp::Ord::cmp(&self.terminated,
                        &other.terminated) {
                    ::core::cmp::Ordering::Equal =>
                        ::core::cmp::Ord::cmp(&self.token_len, &other.token_len),
                    cmp => cmp,
                },
            cmp => cmp,
        }
    }
}Ord)]
251pub struct GuardedStr {
252    pub n_hashes: u32,
253    pub terminated: bool,
254    pub token_len: u32,
255}
256
257#[derive(#[automatically_derived]
impl ::core::clone::Clone for RawStrError {
    #[inline]
    fn clone(&self) -> RawStrError {
        let _: ::core::clone::AssertParamIsClone<char>;
        let _: ::core::clone::AssertParamIsClone<u32>;
        let _: ::core::clone::AssertParamIsClone<Option<u32>>;
        *self
    }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for RawStrError { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for RawStrError {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        match self {
            RawStrError::InvalidStarter { bad_char: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "InvalidStarter", "bad_char", &__self_0),
            RawStrError::NoTerminator {
                expected: __self_0,
                found: __self_1,
                possible_terminator_offset: __self_2 } =>
                ::core::fmt::Formatter::debug_struct_field3_finish(f,
                    "NoTerminator", "expected", __self_0, "found", __self_1,
                    "possible_terminator_offset", &__self_2),
            RawStrError::TooManyDelimiters { found: __self_0 } =>
                ::core::fmt::Formatter::debug_struct_field1_finish(f,
                    "TooManyDelimiters", "found", &__self_0),
        }
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for RawStrError {
    #[inline]
    fn eq(&self, other: &RawStrError) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr &&
            match (self, other) {
                (RawStrError::InvalidStarter { bad_char: __self_0 },
                    RawStrError::InvalidStarter { bad_char: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                (RawStrError::NoTerminator {
                    expected: __self_0,
                    found: __self_1,
                    possible_terminator_offset: __self_2 },
                    RawStrError::NoTerminator {
                    expected: __arg1_0,
                    found: __arg1_1,
                    possible_terminator_offset: __arg1_2 }) =>
                    __self_0 == __arg1_0 && __self_1 == __arg1_1 &&
                        __self_2 == __arg1_2,
                (RawStrError::TooManyDelimiters { found: __self_0 },
                    RawStrError::TooManyDelimiters { found: __arg1_0 }) =>
                    __self_0 == __arg1_0,
                _ => unsafe { ::core::intrinsics::unreachable() }
            }
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for RawStrError {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) {
        let _: ::core::cmp::AssertParamIsEq<char>;
        let _: ::core::cmp::AssertParamIsEq<u32>;
        let _: ::core::cmp::AssertParamIsEq<Option<u32>>;
    }
}Eq, #[automatically_derived]
impl ::core::cmp::PartialOrd for RawStrError {
    #[inline]
    fn partial_cmp(&self, other: &RawStrError)
        -> ::core::option::Option<::core::cmp::Ordering> {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match (self, other) {
            (RawStrError::InvalidStarter { bad_char: __self_0 },
                RawStrError::InvalidStarter { bad_char: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            (RawStrError::NoTerminator {
                expected: __self_0,
                found: __self_1,
                possible_terminator_offset: __self_2 },
                RawStrError::NoTerminator {
                expected: __arg1_0,
                found: __arg1_1,
                possible_terminator_offset: __arg1_2 }) =>
                match ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0)
                    {
                    ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                        =>
                        match ::core::cmp::PartialOrd::partial_cmp(__self_1,
                                __arg1_1) {
                            ::core::option::Option::Some(::core::cmp::Ordering::Equal)
                                => ::core::cmp::PartialOrd::partial_cmp(__self_2, __arg1_2),
                            cmp => cmp,
                        },
                    cmp => cmp,
                },
            (RawStrError::TooManyDelimiters { found: __self_0 },
                RawStrError::TooManyDelimiters { found: __arg1_0 }) =>
                ::core::cmp::PartialOrd::partial_cmp(__self_0, __arg1_0),
            _ =>
                ::core::cmp::PartialOrd::partial_cmp(&__self_discr,
                    &__arg1_discr),
        }
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for RawStrError {
    #[inline]
    fn cmp(&self, other: &RawStrError) -> ::core::cmp::Ordering {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        match ::core::cmp::Ord::cmp(&__self_discr, &__arg1_discr) {
            ::core::cmp::Ordering::Equal =>
                match (self, other) {
                    (RawStrError::InvalidStarter { bad_char: __self_0 },
                        RawStrError::InvalidStarter { bad_char: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    (RawStrError::NoTerminator {
                        expected: __self_0,
                        found: __self_1,
                        possible_terminator_offset: __self_2 },
                        RawStrError::NoTerminator {
                        expected: __arg1_0,
                        found: __arg1_1,
                        possible_terminator_offset: __arg1_2 }) =>
                        match ::core::cmp::Ord::cmp(__self_0, __arg1_0) {
                            ::core::cmp::Ordering::Equal =>
                                match ::core::cmp::Ord::cmp(__self_1, __arg1_1) {
                                    ::core::cmp::Ordering::Equal =>
                                        ::core::cmp::Ord::cmp(__self_2, __arg1_2),
                                    cmp => cmp,
                                },
                            cmp => cmp,
                        },
                    (RawStrError::TooManyDelimiters { found: __self_0 },
                        RawStrError::TooManyDelimiters { found: __arg1_0 }) =>
                        ::core::cmp::Ord::cmp(__self_0, __arg1_0),
                    _ => unsafe { ::core::intrinsics::unreachable() }
                },
            cmp => cmp,
        }
    }
}Ord)]
258pub enum RawStrError {
259    /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
260    InvalidStarter { bad_char: char },
261    /// The string was not terminated, e.g. `r###"abcde"##`.
262    /// `possible_terminator_offset` is the number of characters after `r` or
263    /// `br` where they may have intended to terminate it.
264    NoTerminator { expected: u32, found: u32, possible_terminator_offset: Option<u32> },
265    /// More than 255 `#`s exist.
266    TooManyDelimiters { found: u32 },
267}
268
269/// Base of numeric literal encoding according to its prefix.
270#[derive(#[automatically_derived]
impl ::core::clone::Clone for Base {
    #[inline]
    fn clone(&self) -> Base { *self }
}Clone, #[automatically_derived]
impl ::core::marker::Copy for Base { }Copy, #[automatically_derived]
impl ::core::fmt::Debug for Base {
    #[inline]
    fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
        ::core::fmt::Formatter::write_str(f,
            match self {
                Base::Binary => "Binary",
                Base::Octal => "Octal",
                Base::Decimal => "Decimal",
                Base::Hexadecimal => "Hexadecimal",
            })
    }
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for Base {
    #[inline]
    fn eq(&self, other: &Base) -> bool {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        __self_discr == __arg1_discr
    }
}PartialEq, #[automatically_derived]
impl ::core::cmp::Eq for Base {
    #[inline]
    #[doc(hidden)]
    #[coverage(off)]
    fn assert_receiver_is_total_eq(&self) {}
}Eq, #[automatically_derived]
impl ::core::cmp::PartialOrd for Base {
    #[inline]
    fn partial_cmp(&self, other: &Base)
        -> ::core::option::Option<::core::cmp::Ordering> {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        ::core::cmp::PartialOrd::partial_cmp(&__self_discr, &__arg1_discr)
    }
}PartialOrd, #[automatically_derived]
impl ::core::cmp::Ord for Base {
    #[inline]
    fn cmp(&self, other: &Base) -> ::core::cmp::Ordering {
        let __self_discr = ::core::intrinsics::discriminant_value(self);
        let __arg1_discr = ::core::intrinsics::discriminant_value(other);
        ::core::cmp::Ord::cmp(&__self_discr, &__arg1_discr)
    }
}Ord)]
271pub enum Base {
272    /// Literal starts with "0b".
273    Binary = 2,
274    /// Literal starts with "0o".
275    Octal = 8,
276    /// Literal doesn't contain a prefix.
277    Decimal = 10,
278    /// Literal starts with "0x".
279    Hexadecimal = 16,
280}
281
282/// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
283/// but shebang isn't a part of rust syntax.
284pub fn strip_shebang(input: &str) -> Option<usize> {
285    // Shebang must start with `#!` literally, without any preceding whitespace.
286    // For simplicity we consider any line starting with `#!` a shebang,
287    // regardless of restrictions put on shebangs by specific platforms.
288    if let Some(input_tail) = input.strip_prefix("#!") {
289        // Ok, this is a shebang but if the next non-whitespace token is `[`,
290        // then it may be valid Rust code, so consider it Rust code.
291        let next_non_whitespace_token =
292            tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| {
293                !#[allow(non_exhaustive_omitted_patterns)] match tok {
    TokenKind::Whitespace | TokenKind::LineComment { doc_style: None } |
        TokenKind::BlockComment { doc_style: None, .. } => true,
    _ => false,
}matches!(
294                    tok,
295                    TokenKind::Whitespace
296                        | TokenKind::LineComment { doc_style: None }
297                        | TokenKind::BlockComment { doc_style: None, .. }
298                )
299            });
300        if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
301            // No other choice than to consider this a shebang.
302            return Some(2 + input_tail.lines().next().unwrap_or_default().len());
303        }
304    }
305    None
306}
307
308/// Validates a raw string literal. Used for getting more information about a
309/// problem with a `RawStr`/`RawByteStr` with a `None` field.
310#[inline]
311pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError> {
312    if true {
    if !!input.is_empty() {
        ::core::panicking::panic("assertion failed: !input.is_empty()")
    };
};debug_assert!(!input.is_empty());
313    let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
314    // Move past the leading `r` or `br`.
315    for _ in 0..prefix_len {
316        cursor.bump().unwrap();
317    }
318    cursor.raw_double_quoted_string(prefix_len).map(|_| ())
319}
320
321/// Creates an iterator that produces tokens from the input string.
322///
323/// When parsing a full Rust document,
324/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`].
325///
326/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`]
327pub fn tokenize(
328    input: &str,
329    frontmatter_allowed: FrontmatterAllowed,
330) -> impl Iterator<Item = Token> {
331    let mut cursor = Cursor::new(input, frontmatter_allowed);
332    std::iter::from_fn(move || {
333        let token = cursor.advance_token();
334        if token.kind != TokenKind::Eof { Some(token) } else { None }
335    })
336}
337
338/// True if `c` is considered a whitespace according to Rust language definition.
339/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
340/// for definitions of these classes.
341pub fn is_whitespace(c: char) -> bool {
342    // This is Pattern_White_Space.
343    //
344    // Note that this set is stable (ie, it doesn't change with different
345    // Unicode versions), so it's ok to just hard-code the values.
346
347    #[allow(non_exhaustive_omitted_patterns)] match c {
    '\u{000A}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0085}' |
        '\u{2028}' | '\u{2029}' | '\u{200E}' | '\u{200F}' | '\u{0009}' |
        '\u{0020}' => true,
    _ => false,
}matches!(
348        c,
349        // End-of-line characters
350        | '\u{000A}' // line feed (\n)
351        | '\u{000B}' // vertical tab
352        | '\u{000C}' // form feed
353        | '\u{000D}' // carriage return (\r)
354        | '\u{0085}' // next line (from latin1)
355        | '\u{2028}' // LINE SEPARATOR
356        | '\u{2029}' // PARAGRAPH SEPARATOR
357
358        // `Default_Ignorable_Code_Point` characters
359        | '\u{200E}' // LEFT-TO-RIGHT MARK
360        | '\u{200F}' // RIGHT-TO-LEFT MARK
361
362        // Horizontal space characters
363        | '\u{0009}'   // tab (\t)
364        | '\u{0020}' // space
365    )
366}
367
368/// True if `c` is considered horizontal whitespace according to Rust language definition.
369pub fn is_horizontal_whitespace(c: char) -> bool {
370    // This is Pattern_White_Space.
371    //
372    // Note that this set is stable (ie, it doesn't change with different
373    // Unicode versions), so it's ok to just hard-code the values.
374
375    #[allow(non_exhaustive_omitted_patterns)] match c {
    '\u{0009}' | '\u{0020}' => true,
    _ => false,
}matches!(
376        c,
377        // Horizontal space characters
378        '\u{0009}'   // tab (\t)
379        | '\u{0020}' // space
380    )
381}
382
383/// True if `c` is valid as a first character of an identifier.
384/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
385/// a formal definition of valid identifier name.
386pub fn is_id_start(c: char) -> bool {
387    // This is XID_Start OR '_' (which formally is not a XID_Start).
388    c == '_' || unicode_ident::is_xid_start(c)
389}
390
391/// True if `c` is valid as a non-first character of an identifier.
392/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
393/// a formal definition of valid identifier name.
394pub fn is_id_continue(c: char) -> bool {
395    unicode_ident::is_xid_continue(c)
396}
397
398/// The passed string is lexically an identifier.
399pub fn is_ident(string: &str) -> bool {
400    let mut chars = string.chars();
401    if let Some(start) = chars.next() {
402        is_id_start(start) && chars.all(is_id_continue)
403    } else {
404        false
405    }
406}
407
408pub enum FrontmatterAllowed {
409    Yes,
410    No,
411}
412
413/// Peekable iterator over a char sequence.
414///
415/// Next characters can be peeked via `first` method,
416/// and position can be shifted forward via `bump` method.
417pub struct Cursor<'a> {
418    len_remaining: usize,
419    /// Iterator over chars. Slightly faster than a &str.
420    chars: Chars<'a>,
421    pub(crate) frontmatter_allowed: FrontmatterAllowed,
422    #[cfg(debug_assertions)]
423    prev: char,
424}
425
426const EOF_CHAR: char = '\0';
427
428impl<'a> Cursor<'a> {
429    pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
430        Cursor {
431            len_remaining: input.len(),
432            chars: input.chars(),
433            frontmatter_allowed,
434            #[cfg(debug_assertions)]
435            prev: EOF_CHAR,
436        }
437    }
438
439    pub fn as_str(&self) -> &'a str {
440        self.chars.as_str()
441    }
442
443    /// Returns the last eaten symbol (or `'\0'` in release builds).
444    /// (For debug assertions only.)
445    pub(crate) fn prev(&self) -> char {
446        #[cfg(debug_assertions)]
447        {
448            self.prev
449        }
450
451        #[cfg(not(debug_assertions))]
452        {
453            EOF_CHAR
454        }
455    }
456
457    /// Peeks the next symbol from the input stream without consuming it.
458    /// If requested position doesn't exist, `EOF_CHAR` is returned.
459    /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
460    /// it should be checked with `is_eof` method.
461    pub fn first(&self) -> char {
462        // `.next()` optimizes better than `.nth(0)`
463        self.chars.clone().next().unwrap_or(EOF_CHAR)
464    }
465
466    /// Peeks the second symbol from the input stream without consuming it.
467    pub(crate) fn second(&self) -> char {
468        // `.next()` optimizes better than `.nth(1)`
469        let mut iter = self.chars.clone();
470        iter.next();
471        iter.next().unwrap_or(EOF_CHAR)
472    }
473
474    /// Peeks the third symbol from the input stream without consuming it.
475    pub fn third(&self) -> char {
476        // `.next()` optimizes better than `.nth(2)`
477        let mut iter = self.chars.clone();
478        iter.next();
479        iter.next();
480        iter.next().unwrap_or(EOF_CHAR)
481    }
482
483    /// Checks if there is nothing more to consume.
484    pub(crate) fn is_eof(&self) -> bool {
485        self.chars.as_str().is_empty()
486    }
487
488    /// Returns amount of already consumed symbols.
489    pub(crate) fn pos_within_token(&self) -> u32 {
490        (self.len_remaining - self.chars.as_str().len()) as u32
491    }
492
493    /// Resets the number of bytes consumed to 0.
494    pub(crate) fn reset_pos_within_token(&mut self) {
495        self.len_remaining = self.chars.as_str().len();
496    }
497
498    /// Moves to the next character.
499    pub(crate) fn bump(&mut self) -> Option<char> {
500        let c = self.chars.next()?;
501
502        #[cfg(debug_assertions)]
503        {
504            self.prev = c;
505        }
506
507        Some(c)
508    }
509
510    /// Moves to a substring by a number of bytes.
511    pub(crate) fn bump_bytes(&mut self, n: usize) {
512        self.chars = self.as_str()[n..].chars();
513    }
514
515    /// Eats symbols while predicate returns true or until the end of file is reached.
516    pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
517        // It was tried making optimized version of this for eg. line comments, but
518        // LLVM can inline all of this and compile it down to fast iteration over bytes.
519        while predicate(self.first()) && !self.is_eof() {
520            self.bump();
521        }
522    }
523
524    pub(crate) fn eat_until(&mut self, byte: u8) {
525        self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
526            Some(index) => self.as_str()[index..].chars(),
527            None => "".chars(),
528        }
529    }
530
531    /// Parses a token from the input string.
532    pub fn advance_token(&mut self) -> Token {
533        let Some(first_char) = self.bump() else {
534            return Token::new(TokenKind::Eof, 0);
535        };
536
537        let token_kind = match first_char {
538            c if #[allow(non_exhaustive_omitted_patterns)] match self.frontmatter_allowed {
    FrontmatterAllowed::Yes => true,
    _ => false,
}matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
539                && is_whitespace(c) =>
540            {
541                let mut last = first_char;
542                while is_whitespace(self.first()) {
543                    let Some(c) = self.bump() else {
544                        break;
545                    };
546                    last = c;
547                }
548                // invalid frontmatter opening as whitespace preceding it isn't newline.
549                // combine the whitespace and the frontmatter to a single token as we shall
550                // error later.
551                if last != '\n' && self.as_str().starts_with("---") {
552                    self.bump();
553                    self.frontmatter(true)
554                } else {
555                    Whitespace
556                }
557            }
558            '-' if #[allow(non_exhaustive_omitted_patterns)] match self.frontmatter_allowed {
    FrontmatterAllowed::Yes => true,
    _ => false,
}matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
559                && self.as_str().starts_with("--") =>
560            {
561                // happy path
562                self.frontmatter(false)
563            }
564            // Slash, comment or block comment.
565            '/' => match self.first() {
566                '/' => self.line_comment(),
567                '*' => self.block_comment(),
568                _ => Slash,
569            },
570
571            // Whitespace sequence.
572            c if is_whitespace(c) => self.whitespace(),
573
574            // Raw identifier, raw string literal or identifier.
575            'r' => match (self.first(), self.second()) {
576                ('#', c1) if is_id_start(c1) => self.raw_ident(),
577                ('#', _) | ('"', _) => {
578                    let res = self.raw_double_quoted_string(1);
579                    let suffix_start = self.pos_within_token();
580                    if res.is_ok() {
581                        self.eat_literal_suffix();
582                    }
583                    let kind = RawStr { n_hashes: res.ok() };
584                    Literal { kind, suffix_start }
585                }
586                _ => self.ident_or_unknown_prefix(),
587            },
588
589            // Byte literal, byte string literal, raw byte string literal or identifier.
590            'b' => self.c_or_byte_string(
591                |terminated| ByteStr { terminated },
592                |n_hashes| RawByteStr { n_hashes },
593                Some(|terminated| Byte { terminated }),
594            ),
595
596            // c-string literal, raw c-string literal or identifier.
597            'c' => self.c_or_byte_string(
598                |terminated| CStr { terminated },
599                |n_hashes| RawCStr { n_hashes },
600                None,
601            ),
602
603            // Identifier (this should be checked after other variant that can
604            // start as identifier).
605            c if is_id_start(c) => self.ident_or_unknown_prefix(),
606
607            // Numeric literal.
608            c @ '0'..='9' => {
609                let literal_kind = self.number(c);
610                let suffix_start = self.pos_within_token();
611                self.eat_literal_suffix();
612                TokenKind::Literal { kind: literal_kind, suffix_start }
613            }
614
615            // Guarded string literal prefix: `#"` or `##`
616            '#' if #[allow(non_exhaustive_omitted_patterns)] match self.first() {
    '"' | '#' => true,
    _ => false,
}matches!(self.first(), '"' | '#') => {
617                self.bump();
618                TokenKind::GuardedStrPrefix
619            }
620
621            // One-symbol tokens.
622            ';' => Semi,
623            ',' => Comma,
624            '.' => Dot,
625            '(' => OpenParen,
626            ')' => CloseParen,
627            '{' => OpenBrace,
628            '}' => CloseBrace,
629            '[' => OpenBracket,
630            ']' => CloseBracket,
631            '@' => At,
632            '#' => Pound,
633            '~' => Tilde,
634            '?' => Question,
635            ':' => Colon,
636            '$' => Dollar,
637            '=' => Eq,
638            '!' => Bang,
639            '<' => Lt,
640            '>' => Gt,
641            '-' => Minus,
642            '&' => And,
643            '|' => Or,
644            '+' => Plus,
645            '*' => Star,
646            '^' => Caret,
647            '%' => Percent,
648
649            // Lifetime or character literal.
650            '\'' => self.lifetime_or_char(),
651
652            // String literal.
653            '"' => {
654                let terminated = self.double_quoted_string();
655                let suffix_start = self.pos_within_token();
656                if terminated {
657                    self.eat_literal_suffix();
658                }
659                let kind = Str { terminated };
660                Literal { kind, suffix_start }
661            }
662            // Identifier starting with an emoji. Only lexed for graceful error recovery.
663            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
664            _ => Unknown,
665        };
666        if #[allow(non_exhaustive_omitted_patterns)] match self.frontmatter_allowed {
    FrontmatterAllowed::Yes => true,
    _ => false,
}matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
667            && !#[allow(non_exhaustive_omitted_patterns)] match token_kind {
    Whitespace => true,
    _ => false,
}matches!(token_kind, Whitespace)
668        {
669            // stop allowing frontmatters after first non-whitespace token
670            self.frontmatter_allowed = FrontmatterAllowed::No;
671        }
672        let res = Token::new(token_kind, self.pos_within_token());
673        self.reset_pos_within_token();
674        res
675    }
676
677    /// Given that one `-` was eaten, eat the rest of the frontmatter.
678    fn frontmatter(&mut self, has_invalid_preceding_whitespace: bool) -> TokenKind {
679        if true {
    match (&'-', &self.prev()) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = ::core::panicking::AssertKind::Eq;
                ::core::panicking::assert_failed(kind, &*left_val,
                    &*right_val, ::core::option::Option::None);
            }
        }
    };
};debug_assert_eq!('-', self.prev());
680
681        let pos = self.pos_within_token();
682        self.eat_while(|c| c == '-');
683
684        // one `-` is eaten by the caller.
685        let length_opening = self.pos_within_token() - pos + 1;
686
687        // must be ensured by the caller
688        if true {
    if !(length_opening >= 3) {
        ::core::panicking::panic("assertion failed: length_opening >= 3")
    };
};debug_assert!(length_opening >= 3);
689
690        // whitespace between the opening and the infostring.
691        self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
692
693        // copied from `eat_identifier`, but allows `-` and `.` in infostring to allow something like
694        // `---Cargo.toml` as a valid opener
695        if is_id_start(self.first()) {
696            self.bump();
697            self.eat_while(|c| is_id_continue(c) || c == '-' || c == '.');
698        }
699
700        self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
701        let invalid_infostring = self.first() != '\n';
702
703        let mut found = false;
704        let nl_fence_pattern = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("\n{0:-<1$}", "",
                length_opening as usize))
    })format!("\n{:-<1$}", "", length_opening as usize);
705        if let Some(closing) = self.as_str().find(&nl_fence_pattern) {
706            // candidate found
707            self.bump_bytes(closing + nl_fence_pattern.len());
708            // in case like
709            // ---cargo
710            // --- blahblah
711            // or
712            // ---cargo
713            // ----
714            // combine those stuff into this frontmatter token such that it gets detected later.
715            self.eat_until(b'\n');
716            found = true;
717        }
718
719        if !found {
720            // recovery strategy: a closing statement might have preceding whitespace/newline
721            // but not have enough dashes to properly close. In this case, we eat until there,
722            // and report a mismatch in the parser.
723            let mut rest = self.as_str();
724            // We can look for a shorter closing (starting with four dashes but closing with three)
725            // and other indications that Rust has started and the infostring has ended.
726            let mut potential_closing = rest
727                .find("\n---")
728                // n.b. only in the case where there are dashes, we move the index to the line where
729                // the dashes start as we eat to include that line. For other cases those are Rust code
730                // and not included in the frontmatter.
731                .map(|x| x + 1)
732                .or_else(|| rest.find("\nuse "))
733                .or_else(|| rest.find("\n//!"))
734                .or_else(|| rest.find("\n#!["));
735
736            if potential_closing.is_none() {
737                // a less fortunate recovery if all else fails which finds any dashes preceded by whitespace
738                // on a standalone line. Might be wrong.
739                let mut base_index = 0;
740                while let Some(closing) = rest.find("---") {
741                    let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
742                    if rest[preceding_chars_start..closing].chars().all(is_horizontal_whitespace) {
743                        // candidate found
744                        potential_closing = Some(closing + base_index);
745                        break;
746                    } else {
747                        rest = &rest[closing + 3..];
748                        base_index += closing + 3;
749                    }
750                }
751            }
752
753            if let Some(potential_closing) = potential_closing {
754                // bump to the potential closing, and eat everything on that line.
755                self.bump_bytes(potential_closing);
756                self.eat_until(b'\n');
757            } else {
758                // eat everything. this will get reported as an unclosed frontmatter.
759                self.eat_while(|_| true);
760            }
761        }
762
763        Frontmatter { has_invalid_preceding_whitespace, invalid_infostring }
764    }
765
766    fn line_comment(&mut self) -> TokenKind {
767        if true {
    if !(self.prev() == '/' && self.first() == '/') {
        ::core::panicking::panic("assertion failed: self.prev() == \'/\' && self.first() == \'/\'")
    };
};debug_assert!(self.prev() == '/' && self.first() == '/');
768        self.bump();
769
770        let doc_style = match self.first() {
771            // `//!` is an inner line doc comment.
772            '!' => Some(DocStyle::Inner),
773            // `////` (more than 3 slashes) is not considered a doc comment.
774            '/' if self.second() != '/' => Some(DocStyle::Outer),
775            _ => None,
776        };
777
778        self.eat_until(b'\n');
779        LineComment { doc_style }
780    }
781
782    fn block_comment(&mut self) -> TokenKind {
783        if true {
    if !(self.prev() == '/' && self.first() == '*') {
        ::core::panicking::panic("assertion failed: self.prev() == \'/\' && self.first() == \'*\'")
    };
};debug_assert!(self.prev() == '/' && self.first() == '*');
784        self.bump();
785
786        let doc_style = match self.first() {
787            // `/*!` is an inner block doc comment.
788            '!' => Some(DocStyle::Inner),
789            // `/***` (more than 2 stars) is not considered a doc comment.
790            // `/**/` is not considered a doc comment.
791            '*' if !#[allow(non_exhaustive_omitted_patterns)] match self.second() {
    '*' | '/' => true,
    _ => false,
}matches!(self.second(), '*' | '/') => Some(DocStyle::Outer),
792            _ => None,
793        };
794
795        let mut depth = 1usize;
796        while let Some(c) = self.bump() {
797            match c {
798                '/' if self.first() == '*' => {
799                    self.bump();
800                    depth += 1;
801                }
802                '*' if self.first() == '/' => {
803                    self.bump();
804                    depth -= 1;
805                    if depth == 0 {
806                        // This block comment is closed, so for a construction like "/* */ */"
807                        // there will be a successfully parsed block comment "/* */"
808                        // and " */" will be processed separately.
809                        break;
810                    }
811                }
812                _ => (),
813            }
814        }
815
816        BlockComment { doc_style, terminated: depth == 0 }
817    }
818
819    fn whitespace(&mut self) -> TokenKind {
820        if true {
    if !is_whitespace(self.prev()) {
        ::core::panicking::panic("assertion failed: is_whitespace(self.prev())")
    };
};debug_assert!(is_whitespace(self.prev()));
821        self.eat_while(is_whitespace);
822        Whitespace
823    }
824
825    fn raw_ident(&mut self) -> TokenKind {
826        if true {
    if !(self.prev() == 'r' && self.first() == '#' &&
                is_id_start(self.second())) {
        ::core::panicking::panic("assertion failed: self.prev() == \'r\' && self.first() == \'#\' && is_id_start(self.second())")
    };
};debug_assert!(self.prev() == 'r' && self.first() == '#' && is_id_start(self.second()));
827        // Eat "#" symbol.
828        self.bump();
829        // Eat the identifier part of RawIdent.
830        self.eat_identifier();
831        RawIdent
832    }
833
834    fn ident_or_unknown_prefix(&mut self) -> TokenKind {
835        if true {
    if !is_id_start(self.prev()) {
        ::core::panicking::panic("assertion failed: is_id_start(self.prev())")
    };
};debug_assert!(is_id_start(self.prev()));
836        // Start is already eaten, eat the rest of identifier.
837        self.eat_while(is_id_continue);
838        // Known prefixes must have been handled earlier. So if
839        // we see a prefix here, it is definitely an unknown prefix.
840        match self.first() {
841            '#' | '"' | '\'' => UnknownPrefix,
842            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
843            _ => Ident,
844        }
845    }
846
847    fn invalid_ident(&mut self) -> TokenKind {
848        // Start is already eaten, eat the rest of identifier.
849        self.eat_while(|c| {
850            const ZERO_WIDTH_JOINER: char = '\u{200d}';
851            is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER
852        });
853        // An invalid identifier followed by '#' or '"' or '\'' could be
854        // interpreted as an invalid literal prefix. We don't bother doing that
855        // because the treatment of invalid identifiers and invalid prefixes
856        // would be the same.
857        InvalidIdent
858    }
859
860    fn c_or_byte_string(
861        &mut self,
862        mk_kind: fn(bool) -> LiteralKind,
863        mk_kind_raw: fn(Option<u8>) -> LiteralKind,
864        single_quoted: Option<fn(bool) -> LiteralKind>,
865    ) -> TokenKind {
866        match (self.first(), self.second(), single_quoted) {
867            ('\'', _, Some(single_quoted)) => {
868                self.bump();
869                let terminated = self.single_quoted_string();
870                let suffix_start = self.pos_within_token();
871                if terminated {
872                    self.eat_literal_suffix();
873                }
874                let kind = single_quoted(terminated);
875                Literal { kind, suffix_start }
876            }
877            ('"', _, _) => {
878                self.bump();
879                let terminated = self.double_quoted_string();
880                let suffix_start = self.pos_within_token();
881                if terminated {
882                    self.eat_literal_suffix();
883                }
884                let kind = mk_kind(terminated);
885                Literal { kind, suffix_start }
886            }
887            ('r', '"', _) | ('r', '#', _) => {
888                self.bump();
889                let res = self.raw_double_quoted_string(2);
890                let suffix_start = self.pos_within_token();
891                if res.is_ok() {
892                    self.eat_literal_suffix();
893                }
894                let kind = mk_kind_raw(res.ok());
895                Literal { kind, suffix_start }
896            }
897            _ => self.ident_or_unknown_prefix(),
898        }
899    }
900
901    fn number(&mut self, first_digit: char) -> LiteralKind {
902        if true {
    if !('0' <= self.prev() && self.prev() <= '9') {
        ::core::panicking::panic("assertion failed: \'0\' <= self.prev() && self.prev() <= \'9\'")
    };
};debug_assert!('0' <= self.prev() && self.prev() <= '9');
903        let mut base = Base::Decimal;
904        if first_digit == '0' {
905            // Attempt to parse encoding base.
906            match self.first() {
907                'b' => {
908                    base = Base::Binary;
909                    self.bump();
910                    if !self.eat_decimal_digits() {
911                        return Int { base, empty_int: true };
912                    }
913                }
914                'o' => {
915                    base = Base::Octal;
916                    self.bump();
917                    if !self.eat_decimal_digits() {
918                        return Int { base, empty_int: true };
919                    }
920                }
921                'x' => {
922                    base = Base::Hexadecimal;
923                    self.bump();
924                    if !self.eat_hexadecimal_digits() {
925                        return Int { base, empty_int: true };
926                    }
927                }
928                // Not a base prefix; consume additional digits.
929                '0'..='9' | '_' => {
930                    self.eat_decimal_digits();
931                }
932
933                // Also not a base prefix; nothing more to do here.
934                '.' | 'e' | 'E' => {}
935
936                // Just a 0.
937                _ => return Int { base, empty_int: false },
938            }
939        } else {
940            // No base prefix, parse number in the usual way.
941            self.eat_decimal_digits();
942        }
943
944        match self.first() {
945            // Don't be greedy if this is actually an
946            // integer literal followed by field/method access or a range pattern
947            // (`0..2` and `12.foo()`)
948            '.' if self.second() != '.' && !is_id_start(self.second()) => {
949                // might have stuff after the ., and if it does, it needs to start
950                // with a number
951                self.bump();
952                let mut empty_exponent = false;
953                if self.first().is_ascii_digit() {
954                    self.eat_decimal_digits();
955                    match self.first() {
956                        'e' | 'E' => {
957                            self.bump();
958                            empty_exponent = !self.eat_float_exponent();
959                        }
960                        _ => (),
961                    }
962                }
963                Float { base, empty_exponent }
964            }
965            'e' | 'E' => {
966                self.bump();
967                let empty_exponent = !self.eat_float_exponent();
968                Float { base, empty_exponent }
969            }
970            _ => Int { base, empty_int: false },
971        }
972    }
973
974    fn lifetime_or_char(&mut self) -> TokenKind {
975        if true {
    if !(self.prev() == '\'') {
        ::core::panicking::panic("assertion failed: self.prev() == \'\\\'\'")
    };
};debug_assert!(self.prev() == '\'');
976
977        let can_be_a_lifetime = if self.second() == '\'' {
978            // It's surely not a lifetime.
979            false
980        } else {
981            // If the first symbol is valid for identifier, it can be a lifetime.
982            // Also check if it's a number for a better error reporting (so '0 will
983            // be reported as invalid lifetime and not as unterminated char literal).
984            is_id_start(self.first()) || self.first().is_ascii_digit()
985        };
986
987        if !can_be_a_lifetime {
988            let terminated = self.single_quoted_string();
989            let suffix_start = self.pos_within_token();
990            if terminated {
991                self.eat_literal_suffix();
992            }
993            let kind = Char { terminated };
994            return Literal { kind, suffix_start };
995        }
996
997        if self.first() == 'r' && self.second() == '#' && is_id_start(self.third()) {
998            // Eat "r" and `#`, and identifier start characters.
999            self.bump();
1000            self.bump();
1001            self.bump();
1002            self.eat_while(is_id_continue);
1003            return RawLifetime;
1004        }
1005
1006        // Either a lifetime or a character literal with
1007        // length greater than 1.
1008        let starts_with_number = self.first().is_ascii_digit();
1009
1010        // Skip the literal contents.
1011        // First symbol can be a number (which isn't a valid identifier start),
1012        // so skip it without any checks.
1013        self.bump();
1014        self.eat_while(is_id_continue);
1015
1016        match self.first() {
1017            // Check if after skipping literal contents we've met a closing
1018            // single quote (which means that user attempted to create a
1019            // string with single quotes).
1020            '\'' => {
1021                self.bump();
1022                let kind = Char { terminated: true };
1023                Literal { kind, suffix_start: self.pos_within_token() }
1024            }
1025            '#' if !starts_with_number => UnknownPrefixLifetime,
1026            _ => Lifetime { starts_with_number },
1027        }
1028    }
1029
1030    fn single_quoted_string(&mut self) -> bool {
1031        if true {
    if !(self.prev() == '\'') {
        ::core::panicking::panic("assertion failed: self.prev() == \'\\\'\'")
    };
};debug_assert!(self.prev() == '\'');
1032        // Check if it's a one-symbol literal.
1033        if self.second() == '\'' && self.first() != '\\' {
1034            self.bump();
1035            self.bump();
1036            return true;
1037        }
1038
1039        // Literal has more than one symbol.
1040
1041        // Parse until either quotes are terminated or error is detected.
1042        loop {
1043            match self.first() {
1044                // Quotes are terminated, finish parsing.
1045                '\'' => {
1046                    self.bump();
1047                    return true;
1048                }
1049                // Probably beginning of the comment, which we don't want to include
1050                // to the error report.
1051                '/' => break,
1052                // Newline without following '\'' means unclosed quote, stop parsing.
1053                '\n' if self.second() != '\'' => break,
1054                // End of file, stop parsing.
1055                EOF_CHAR if self.is_eof() => break,
1056                // Escaped slash is considered one character, so bump twice.
1057                '\\' => {
1058                    self.bump();
1059                    self.bump();
1060                }
1061                // Skip the character.
1062                _ => {
1063                    self.bump();
1064                }
1065            }
1066        }
1067        // String was not terminated.
1068        false
1069    }
1070
1071    /// Eats double-quoted string and returns true
1072    /// if string is terminated.
1073    fn double_quoted_string(&mut self) -> bool {
1074        if true {
    if !(self.prev() == '"') {
        ::core::panicking::panic("assertion failed: self.prev() == \'\"\'")
    };
};debug_assert!(self.prev() == '"');
1075        while let Some(c) = self.bump() {
1076            match c {
1077                '"' => {
1078                    return true;
1079                }
1080                '\\' if self.first() == '\\' || self.first() == '"' => {
1081                    // Bump again to skip escaped character.
1082                    self.bump();
1083                }
1084                _ => (),
1085            }
1086        }
1087        // End of file reached.
1088        false
1089    }
1090
1091    /// Attempt to lex for a guarded string literal.
1092    ///
1093    /// Used by `rustc_parse::lexer` to lex for guarded strings
1094    /// conditionally based on edition.
1095    ///
1096    /// Note: this will not reset the `Cursor` when a
1097    /// guarded string is not found. It is the caller's
1098    /// responsibility to do so.
1099    pub fn guarded_double_quoted_string(&mut self) -> Option<GuardedStr> {
1100        if true {
    if !(self.prev() != '#') {
        ::core::panicking::panic("assertion failed: self.prev() != \'#\'")
    };
};debug_assert!(self.prev() != '#');
1101
1102        let mut n_start_hashes: u32 = 0;
1103        while self.first() == '#' {
1104            n_start_hashes += 1;
1105            self.bump();
1106        }
1107
1108        if self.first() != '"' {
1109            return None;
1110        }
1111        self.bump();
1112        if true {
    if !(self.prev() == '"') {
        ::core::panicking::panic("assertion failed: self.prev() == \'\"\'")
    };
};debug_assert!(self.prev() == '"');
1113
1114        // Lex the string itself as a normal string literal
1115        // so we can recover that for older editions later.
1116        let terminated = self.double_quoted_string();
1117        if !terminated {
1118            let token_len = self.pos_within_token();
1119            self.reset_pos_within_token();
1120
1121            return Some(GuardedStr { n_hashes: n_start_hashes, terminated: false, token_len });
1122        }
1123
1124        // Consume closing '#' symbols.
1125        // Note that this will not consume extra trailing `#` characters:
1126        // `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3, .. }`
1127        // followed by a `#` token.
1128        let mut n_end_hashes = 0;
1129        while self.first() == '#' && n_end_hashes < n_start_hashes {
1130            n_end_hashes += 1;
1131            self.bump();
1132        }
1133
1134        // Reserved syntax, always an error, so it doesn't matter if
1135        // `n_start_hashes != n_end_hashes`.
1136
1137        self.eat_literal_suffix();
1138
1139        let token_len = self.pos_within_token();
1140        self.reset_pos_within_token();
1141
1142        Some(GuardedStr { n_hashes: n_start_hashes, terminated: true, token_len })
1143    }
1144
1145    /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
1146    fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
1147        // Wrap the actual function to handle the error with too many hashes.
1148        // This way, it eats the whole raw string.
1149        let n_hashes = self.raw_string_unvalidated(prefix_len)?;
1150        // Only up to 255 `#`s are allowed in raw strings
1151        match u8::try_from(n_hashes) {
1152            Ok(num) => Ok(num),
1153            Err(_) => Err(RawStrError::TooManyDelimiters { found: n_hashes }),
1154        }
1155    }
1156
1157    fn raw_string_unvalidated(&mut self, prefix_len: u32) -> Result<u32, RawStrError> {
1158        if true {
    if !(self.prev() == 'r') {
        ::core::panicking::panic("assertion failed: self.prev() == \'r\'")
    };
};debug_assert!(self.prev() == 'r');
1159        let start_pos = self.pos_within_token();
1160        let mut possible_terminator_offset = None;
1161        let mut max_hashes = 0;
1162
1163        // Count opening '#' symbols.
1164        let mut eaten = 0;
1165        while self.first() == '#' {
1166            eaten += 1;
1167            self.bump();
1168        }
1169        let n_start_hashes = eaten;
1170
1171        // Check that string is started.
1172        match self.bump() {
1173            Some('"') => (),
1174            c => {
1175                let c = c.unwrap_or(EOF_CHAR);
1176                return Err(RawStrError::InvalidStarter { bad_char: c });
1177            }
1178        }
1179
1180        // Skip the string contents and on each '#' character met, check if this is
1181        // a raw string termination.
1182        loop {
1183            self.eat_until(b'"');
1184
1185            if self.is_eof() {
1186                return Err(RawStrError::NoTerminator {
1187                    expected: n_start_hashes,
1188                    found: max_hashes,
1189                    possible_terminator_offset,
1190                });
1191            }
1192
1193            // Eat closing double quote.
1194            self.bump();
1195
1196            // Check that amount of closing '#' symbols
1197            // is equal to the amount of opening ones.
1198            // Note that this will not consume extra trailing `#` characters:
1199            // `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
1200            // followed by a `#` token.
1201            let mut n_end_hashes = 0;
1202            while self.first() == '#' && n_end_hashes < n_start_hashes {
1203                n_end_hashes += 1;
1204                self.bump();
1205            }
1206
1207            if n_end_hashes == n_start_hashes {
1208                return Ok(n_start_hashes);
1209            } else if n_end_hashes > max_hashes {
1210                // Keep track of possible terminators to give a hint about
1211                // where there might be a missing terminator
1212                possible_terminator_offset =
1213                    Some(self.pos_within_token() - start_pos - n_end_hashes + prefix_len);
1214                max_hashes = n_end_hashes;
1215            }
1216        }
1217    }
1218
1219    fn eat_decimal_digits(&mut self) -> bool {
1220        let mut has_digits = false;
1221        loop {
1222            match self.first() {
1223                '_' => {
1224                    self.bump();
1225                }
1226                '0'..='9' => {
1227                    has_digits = true;
1228                    self.bump();
1229                }
1230                _ => break,
1231            }
1232        }
1233        has_digits
1234    }
1235
1236    fn eat_hexadecimal_digits(&mut self) -> bool {
1237        let mut has_digits = false;
1238        loop {
1239            match self.first() {
1240                '_' => {
1241                    self.bump();
1242                }
1243                '0'..='9' | 'a'..='f' | 'A'..='F' => {
1244                    has_digits = true;
1245                    self.bump();
1246                }
1247                _ => break,
1248            }
1249        }
1250        has_digits
1251    }
1252
1253    /// Eats the float exponent. Returns true if at least one digit was met,
1254    /// and returns false otherwise.
1255    fn eat_float_exponent(&mut self) -> bool {
1256        if true {
    if !(self.prev() == 'e' || self.prev() == 'E') {
        ::core::panicking::panic("assertion failed: self.prev() == \'e\' || self.prev() == \'E\'")
    };
};debug_assert!(self.prev() == 'e' || self.prev() == 'E');
1257        if self.first() == '-' || self.first() == '+' {
1258            self.bump();
1259        }
1260        self.eat_decimal_digits()
1261    }
1262
1263    // Eats the suffix of the literal, e.g. "u8".
1264    fn eat_literal_suffix(&mut self) {
1265        self.eat_identifier();
1266    }
1267
1268    // Eats the identifier. Note: succeeds on `_`, which isn't a valid
1269    // identifier.
1270    fn eat_identifier(&mut self) {
1271        if !is_id_start(self.first()) {
1272            return;
1273        }
1274        self.bump();
1275
1276        self.eat_while(is_id_continue);
1277    }
1278}