rustc_ast/
token.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::sync::Arc;
4
5pub use BinOpToken::*;
6pub use LitKind::*;
7pub use Nonterminal::*;
8pub use NtExprKind::*;
9pub use NtPatKind::*;
10pub use TokenKind::*;
11use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
12use rustc_macros::{Decodable, Encodable, HashStable_Generic};
13use rustc_span::edition::Edition;
14use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span, kw, sym};
15#[allow(clippy::useless_attribute)] // FIXME: following use of `hidden_glob_reexports` incorrectly triggers `useless_attribute` lint.
16#[allow(hidden_glob_reexports)]
17use rustc_span::{Ident, Symbol};
18
19use crate::ast;
20use crate::ptr::P;
21use crate::util::case::Case;
22
23#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
24pub enum CommentKind {
25    Line,
26    Block,
27}
28
29#[derive(Clone, PartialEq, Encodable, Decodable, Hash, Debug, Copy)]
30#[derive(HashStable_Generic)]
31pub enum BinOpToken {
32    Plus,
33    Minus,
34    Star,
35    Slash,
36    Percent,
37    Caret,
38    And,
39    Or,
40    Shl,
41    Shr,
42}
43
44// This type must not implement `Hash` due to the unusual `PartialEq` impl below.
45#[derive(Copy, Clone, Debug, Encodable, Decodable, HashStable_Generic)]
46pub enum InvisibleOrigin {
47    // From the expansion of a metavariable in a declarative macro.
48    MetaVar(MetaVarKind),
49
50    // Converted from `proc_macro::Delimiter` in
51    // `proc_macro::Delimiter::to_internal`, i.e. returned by a proc macro.
52    ProcMacro,
53
54    // Converted from `TokenKind::Interpolated` in
55    // `TokenStream::flatten_token`. Treated similarly to `ProcMacro`.
56    FlattenToken,
57}
58
59impl PartialEq for InvisibleOrigin {
60    #[inline]
61    fn eq(&self, _other: &InvisibleOrigin) -> bool {
62        // When we had AST-based nonterminals we couldn't compare them, and the
63        // old `Nonterminal` type had an `eq` that always returned false,
64        // resulting in this restriction:
65        // https://doc.rust-lang.org/nightly/reference/macros-by-example.html#forwarding-a-matched-fragment
66        // This `eq` emulates that behaviour. We could consider lifting this
67        // restriction now but there are still cases involving invisible
68        // delimiters that make it harder than it first appears.
69        false
70    }
71}
72
73/// Annoyingly similar to `NonterminalKind`, but the slight differences are important.
74#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
75pub enum MetaVarKind {
76    Item,
77    Block,
78    Stmt,
79    Pat(NtPatKind),
80    Expr {
81        kind: NtExprKind,
82        // This field is needed for `Token::can_begin_literal_maybe_minus`.
83        can_begin_literal_maybe_minus: bool,
84        // This field is needed for `Token::can_begin_string_literal`.
85        can_begin_string_literal: bool,
86    },
87    Ty,
88    Ident,
89    Lifetime,
90    Literal,
91    Meta,
92    Path,
93    Vis,
94    TT,
95}
96
97impl fmt::Display for MetaVarKind {
98    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99        let sym = match self {
100            MetaVarKind::Item => sym::item,
101            MetaVarKind::Block => sym::block,
102            MetaVarKind::Stmt => sym::stmt,
103            MetaVarKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat,
104            MetaVarKind::Pat(PatParam { inferred: false }) => sym::pat_param,
105            MetaVarKind::Expr { kind: Expr2021 { inferred: true } | Expr, .. } => sym::expr,
106            MetaVarKind::Expr { kind: Expr2021 { inferred: false }, .. } => sym::expr_2021,
107            MetaVarKind::Ty => sym::ty,
108            MetaVarKind::Ident => sym::ident,
109            MetaVarKind::Lifetime => sym::lifetime,
110            MetaVarKind::Literal => sym::literal,
111            MetaVarKind::Meta => sym::meta,
112            MetaVarKind::Path => sym::path,
113            MetaVarKind::Vis => sym::vis,
114            MetaVarKind::TT => sym::tt,
115        };
116        write!(f, "{sym}")
117    }
118}
119
120/// Describes how a sequence of token trees is delimited.
121/// Cannot use `proc_macro::Delimiter` directly because this
122/// structure should implement some additional traits.
123#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
124pub enum Delimiter {
125    /// `( ... )`
126    Parenthesis,
127    /// `{ ... }`
128    Brace,
129    /// `[ ... ]`
130    Bracket,
131    /// `∅ ... ∅`
132    /// An invisible delimiter, that may, for example, appear around tokens coming from a
133    /// "macro variable" `$var`. It is important to preserve operator priorities in cases like
134    /// `$var * 3` where `$var` is `1 + 2`.
135    /// Invisible delimiters might not survive roundtrip of a token stream through a string.
136    Invisible(InvisibleOrigin),
137}
138
139impl Delimiter {
140    // Should the parser skip these delimiters? Only happens for certain kinds
141    // of invisible delimiters. Ideally this function will eventually disappear
142    // and no invisible delimiters will be skipped.
143    #[inline]
144    pub fn skip(&self) -> bool {
145        match self {
146            Delimiter::Parenthesis | Delimiter::Bracket | Delimiter::Brace => false,
147            Delimiter::Invisible(InvisibleOrigin::MetaVar(_)) => false,
148            Delimiter::Invisible(InvisibleOrigin::FlattenToken | InvisibleOrigin::ProcMacro) => {
149                true
150            }
151        }
152    }
153
154    // This exists because `InvisibleOrigin`s should be compared. It is only used for assertions.
155    pub fn eq_ignoring_invisible_origin(&self, other: &Delimiter) -> bool {
156        match (self, other) {
157            (Delimiter::Parenthesis, Delimiter::Parenthesis) => true,
158            (Delimiter::Brace, Delimiter::Brace) => true,
159            (Delimiter::Bracket, Delimiter::Bracket) => true,
160            (Delimiter::Invisible(_), Delimiter::Invisible(_)) => true,
161            _ => false,
162        }
163    }
164}
165
166// Note that the suffix is *not* considered when deciding the `LitKind` in this
167// type. This means that float literals like `1f32` are classified by this type
168// as `Int`. Only upon conversion to `ast::LitKind` will such a literal be
169// given the `Float` kind.
170#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
171pub enum LitKind {
172    Bool, // AST only, must never appear in a `Token`
173    Byte,
174    Char,
175    Integer, // e.g. `1`, `1u8`, `1f32`
176    Float,   // e.g. `1.`, `1.0`, `1e3f32`
177    Str,
178    StrRaw(u8), // raw string delimited by `n` hash symbols
179    ByteStr,
180    ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
181    CStr,
182    CStrRaw(u8),
183    Err(ErrorGuaranteed),
184}
185
186/// A literal token.
187#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
188pub struct Lit {
189    pub kind: LitKind,
190    pub symbol: Symbol,
191    pub suffix: Option<Symbol>,
192}
193
194impl Lit {
195    pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {
196        Lit { kind, symbol, suffix }
197    }
198
199    /// Returns `true` if this is semantically a float literal. This includes
200    /// ones like `1f32` that have an `Integer` kind but a float suffix.
201    pub fn is_semantic_float(&self) -> bool {
202        match self.kind {
203            LitKind::Float => true,
204            LitKind::Integer => match self.suffix {
205                Some(sym) => sym == sym::f32 || sym == sym::f64,
206                None => false,
207            },
208            _ => false,
209        }
210    }
211
212    /// Keep this in sync with `Token::can_begin_literal_maybe_minus` excluding unary negation.
213    pub fn from_token(token: &Token) -> Option<Lit> {
214        match token.uninterpolate().kind {
215            Ident(name, IdentIsRaw::No) if name.is_bool_lit() => Some(Lit::new(Bool, name, None)),
216            Literal(token_lit) => Some(token_lit),
217            Interpolated(ref nt)
218                if let NtExpr(expr) | NtLiteral(expr) = &**nt
219                    && let ast::ExprKind::Lit(token_lit) = expr.kind =>
220            {
221                Some(token_lit)
222            }
223            _ => None,
224        }
225    }
226}
227
228impl fmt::Display for Lit {
229    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230        let Lit { kind, symbol, suffix } = *self;
231        match kind {
232            Byte => write!(f, "b'{symbol}'")?,
233            Char => write!(f, "'{symbol}'")?,
234            Str => write!(f, "\"{symbol}\"")?,
235            StrRaw(n) => write!(
236                f,
237                "r{delim}\"{string}\"{delim}",
238                delim = "#".repeat(n as usize),
239                string = symbol
240            )?,
241            ByteStr => write!(f, "b\"{symbol}\"")?,
242            ByteStrRaw(n) => write!(
243                f,
244                "br{delim}\"{string}\"{delim}",
245                delim = "#".repeat(n as usize),
246                string = symbol
247            )?,
248            CStr => write!(f, "c\"{symbol}\"")?,
249            CStrRaw(n) => {
250                write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
251            }
252            Integer | Float | Bool | Err(_) => write!(f, "{symbol}")?,
253        }
254
255        if let Some(suffix) = suffix {
256            write!(f, "{suffix}")?;
257        }
258
259        Ok(())
260    }
261}
262
263impl LitKind {
264    /// An English article for the literal token kind.
265    pub fn article(self) -> &'static str {
266        match self {
267            Integer | Err(_) => "an",
268            _ => "a",
269        }
270    }
271
272    pub fn descr(self) -> &'static str {
273        match self {
274            Bool => "boolean",
275            Byte => "byte",
276            Char => "char",
277            Integer => "integer",
278            Float => "float",
279            Str | StrRaw(..) => "string",
280            ByteStr | ByteStrRaw(..) => "byte string",
281            CStr | CStrRaw(..) => "C string",
282            Err(_) => "error",
283        }
284    }
285
286    pub(crate) fn may_have_suffix(self) -> bool {
287        matches!(self, Integer | Float | Err(_))
288    }
289}
290
291pub fn ident_can_begin_expr(name: Symbol, span: Span, is_raw: IdentIsRaw) -> bool {
292    let ident_token = Token::new(Ident(name, is_raw), span);
293
294    !ident_token.is_reserved_ident()
295        || ident_token.is_path_segment_keyword()
296        || [
297            kw::Async,
298            kw::Do,
299            kw::Box,
300            kw::Break,
301            kw::Const,
302            kw::Continue,
303            kw::False,
304            kw::For,
305            kw::Gen,
306            kw::If,
307            kw::Let,
308            kw::Loop,
309            kw::Match,
310            kw::Move,
311            kw::Return,
312            kw::True,
313            kw::Try,
314            kw::Unsafe,
315            kw::While,
316            kw::Yield,
317            kw::Safe,
318            kw::Static,
319        ]
320        .contains(&name)
321}
322
323fn ident_can_begin_type(name: Symbol, span: Span, is_raw: IdentIsRaw) -> bool {
324    let ident_token = Token::new(Ident(name, is_raw), span);
325
326    !ident_token.is_reserved_ident()
327        || ident_token.is_path_segment_keyword()
328        || [kw::Underscore, kw::For, kw::Impl, kw::Fn, kw::Unsafe, kw::Extern, kw::Typeof, kw::Dyn]
329            .contains(&name)
330}
331
332#[derive(PartialEq, Encodable, Decodable, Debug, Copy, Clone, HashStable_Generic)]
333pub enum IdentIsRaw {
334    No,
335    Yes,
336}
337
338impl From<bool> for IdentIsRaw {
339    fn from(b: bool) -> Self {
340        if b { Self::Yes } else { Self::No }
341    }
342}
343
344impl From<IdentIsRaw> for bool {
345    fn from(is_raw: IdentIsRaw) -> bool {
346        matches!(is_raw, IdentIsRaw::Yes)
347    }
348}
349
350// SAFETY: due to the `Clone` impl below, all fields of all variants other than
351// `Interpolated` must impl `Copy`.
352#[derive(PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
353pub enum TokenKind {
354    /* Expression-operator symbols. */
355    /// `=`
356    Eq,
357    /// `<`
358    Lt,
359    /// `<=`
360    Le,
361    /// `==`
362    EqEq,
363    /// `!=`
364    Ne,
365    /// `>=`
366    Ge,
367    /// `>`
368    Gt,
369    /// `&&`
370    AndAnd,
371    /// `||`
372    OrOr,
373    /// `!`
374    Not,
375    /// `~`
376    Tilde,
377    BinOp(BinOpToken),
378    BinOpEq(BinOpToken),
379
380    /* Structural symbols */
381    /// `@`
382    At,
383    /// `.`
384    Dot,
385    /// `..`
386    DotDot,
387    /// `...`
388    DotDotDot,
389    /// `..=`
390    DotDotEq,
391    /// `,`
392    Comma,
393    /// `;`
394    Semi,
395    /// `:`
396    Colon,
397    /// `::`
398    PathSep,
399    /// `->`
400    RArrow,
401    /// `<-`
402    LArrow,
403    /// `=>`
404    FatArrow,
405    /// `#`
406    Pound,
407    /// `$`
408    Dollar,
409    /// `?`
410    Question,
411    /// Used by proc macros for representing lifetimes, not generated by lexer right now.
412    SingleQuote,
413    /// An opening delimiter (e.g., `{`).
414    OpenDelim(Delimiter),
415    /// A closing delimiter (e.g., `}`).
416    CloseDelim(Delimiter),
417
418    /* Literals */
419    Literal(Lit),
420
421    /// Identifier token.
422    /// Do not forget about `NtIdent` when you want to match on identifiers.
423    /// It's recommended to use `Token::(ident,uninterpolate,uninterpolated_span)` to
424    /// treat regular and interpolated identifiers in the same way.
425    Ident(Symbol, IdentIsRaw),
426    /// This identifier (and its span) is the identifier passed to the
427    /// declarative macro. The span in the surrounding `Token` is the span of
428    /// the `ident` metavariable in the macro's RHS.
429    NtIdent(Ident, IdentIsRaw),
430
431    /// Lifetime identifier token.
432    /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers.
433    /// It's recommended to use `Token::(lifetime,uninterpolate,uninterpolated_span)` to
434    /// treat regular and interpolated lifetime identifiers in the same way.
435    Lifetime(Symbol, IdentIsRaw),
436    /// This identifier (and its span) is the lifetime passed to the
437    /// declarative macro. The span in the surrounding `Token` is the span of
438    /// the `lifetime` metavariable in the macro's RHS.
439    NtLifetime(Ident, IdentIsRaw),
440
441    /// An embedded AST node, as produced by a macro. This only exists for
442    /// historical reasons. We'd like to get rid of it, for multiple reasons.
443    /// - It's conceptually very strange. Saying a token can contain an AST
444    ///   node is like saying, in natural language, that a word can contain a
445    ///   sentence.
446    /// - It requires special handling in a bunch of places in the parser.
447    /// - It prevents `Token` from implementing `Copy`.
448    /// It adds complexity and likely slows things down. Please don't add new
449    /// occurrences of this token kind!
450    ///
451    /// The span in the surrounding `Token` is that of the metavariable in the
452    /// macro's RHS. The span within the Nonterminal is that of the fragment
453    /// passed to the macro at the call site.
454    Interpolated(Arc<Nonterminal>),
455
456    /// A doc comment token.
457    /// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)
458    /// similarly to symbols in string literal tokens.
459    DocComment(CommentKind, ast::AttrStyle, Symbol),
460
461    /// End Of File
462    Eof,
463}
464
465impl Clone for TokenKind {
466    fn clone(&self) -> Self {
467        // `TokenKind` would impl `Copy` if it weren't for `Interpolated`. So
468        // for all other variants, this implementation of `clone` is just like
469        // a copy. This is faster than the `derive(Clone)` version which has a
470        // separate path for every variant.
471        match self {
472            Interpolated(nt) => Interpolated(Arc::clone(nt)),
473            _ => unsafe { std::ptr::read(self) },
474        }
475    }
476}
477
478#[derive(Clone, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
479pub struct Token {
480    pub kind: TokenKind,
481    pub span: Span,
482}
483
484impl TokenKind {
485    pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind {
486        Literal(Lit::new(kind, symbol, suffix))
487    }
488
489    /// An approximation to proc-macro-style single-character operators used by
490    /// rustc parser. If the operator token can be broken into two tokens, the
491    /// first of which has `n` (1 or 2) chars, then this function performs that
492    /// operation, otherwise it returns `None`.
493    pub fn break_two_token_op(&self, n: u32) -> Option<(TokenKind, TokenKind)> {
494        assert!(n == 1 || n == 2);
495        Some(match (self, n) {
496            (Le, 1) => (Lt, Eq),
497            (EqEq, 1) => (Eq, Eq),
498            (Ne, 1) => (Not, Eq),
499            (Ge, 1) => (Gt, Eq),
500            (AndAnd, 1) => (BinOp(And), BinOp(And)),
501            (OrOr, 1) => (BinOp(Or), BinOp(Or)),
502            (BinOp(Shl), 1) => (Lt, Lt),
503            (BinOp(Shr), 1) => (Gt, Gt),
504            (BinOpEq(Plus), 1) => (BinOp(Plus), Eq),
505            (BinOpEq(Minus), 1) => (BinOp(Minus), Eq),
506            (BinOpEq(Star), 1) => (BinOp(Star), Eq),
507            (BinOpEq(Slash), 1) => (BinOp(Slash), Eq),
508            (BinOpEq(Percent), 1) => (BinOp(Percent), Eq),
509            (BinOpEq(Caret), 1) => (BinOp(Caret), Eq),
510            (BinOpEq(And), 1) => (BinOp(And), Eq),
511            (BinOpEq(Or), 1) => (BinOp(Or), Eq),
512            (BinOpEq(Shl), 1) => (Lt, Le),         // `<` + `<=`
513            (BinOpEq(Shl), 2) => (BinOp(Shl), Eq), // `<<` + `=`
514            (BinOpEq(Shr), 1) => (Gt, Ge),         // `>` + `>=`
515            (BinOpEq(Shr), 2) => (BinOp(Shr), Eq), // `>>` + `=`
516            (DotDot, 1) => (Dot, Dot),
517            (DotDotDot, 1) => (Dot, DotDot), // `.` + `..`
518            (DotDotDot, 2) => (DotDot, Dot), // `..` + `.`
519            (DotDotEq, 2) => (DotDot, Eq),
520            (PathSep, 1) => (Colon, Colon),
521            (RArrow, 1) => (BinOp(Minus), Gt),
522            (LArrow, 1) => (Lt, BinOp(Minus)),
523            (FatArrow, 1) => (Eq, Gt),
524            _ => return None,
525        })
526    }
527
528    /// Returns tokens that are likely to be typed accidentally instead of the current token.
529    /// Enables better error recovery when the wrong token is found.
530    pub fn similar_tokens(&self) -> &[TokenKind] {
531        match self {
532            Comma => &[Dot, Lt, Semi],
533            Semi => &[Colon, Comma],
534            Colon => &[Semi],
535            FatArrow => &[Eq, RArrow, Ge, Gt],
536            _ => &[],
537        }
538    }
539
540    pub fn should_end_const_arg(&self) -> bool {
541        matches!(self, Gt | Ge | BinOp(Shr) | BinOpEq(Shr))
542    }
543}
544
545impl Token {
546    pub fn new(kind: TokenKind, span: Span) -> Self {
547        Token { kind, span }
548    }
549
550    /// Some token that will be thrown away later.
551    pub fn dummy() -> Self {
552        Token::new(TokenKind::Question, DUMMY_SP)
553    }
554
555    /// Recovers a `Token` from an `Ident`. This creates a raw identifier if necessary.
556    pub fn from_ast_ident(ident: Ident) -> Self {
557        Token::new(Ident(ident.name, ident.is_raw_guess().into()), ident.span)
558    }
559
560    /// For interpolated tokens, returns a span of the fragment to which the interpolated
561    /// token refers. For all other tokens this is just a regular span.
562    /// It is particularly important to use this for identifiers and lifetimes
563    /// for which spans affect name resolution and edition checks.
564    /// Note that keywords are also identifiers, so they should use this
565    /// if they keep spans or perform edition checks.
566    pub fn uninterpolated_span(&self) -> Span {
567        match self.kind {
568            NtIdent(ident, _) | NtLifetime(ident, _) => ident.span,
569            Interpolated(ref nt) => nt.use_span(),
570            _ => self.span,
571        }
572    }
573
574    pub fn is_range_separator(&self) -> bool {
575        [DotDot, DotDotDot, DotDotEq].contains(&self.kind)
576    }
577
578    pub fn is_punct(&self) -> bool {
579        match self.kind {
580            Eq | Lt | Le | EqEq | Ne | Ge | Gt | AndAnd | OrOr | Not | Tilde | BinOp(_)
581            | BinOpEq(_) | At | Dot | DotDot | DotDotDot | DotDotEq | Comma | Semi | Colon
582            | PathSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question | SingleQuote => {
583                true
584            }
585
586            OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | Ident(..)
587            | NtIdent(..) | Lifetime(..) | NtLifetime(..) | Interpolated(..) | Eof => false,
588        }
589    }
590
591    pub fn is_like_plus(&self) -> bool {
592        matches!(self.kind, BinOp(Plus) | BinOpEq(Plus))
593    }
594
595    /// Returns `true` if the token can appear at the start of an expression.
596    ///
597    /// **NB**: Take care when modifying this function, since it will change
598    /// the stable set of tokens that are allowed to match an expr nonterminal.
599    pub fn can_begin_expr(&self) -> bool {
600        use Delimiter::*;
601        match self.uninterpolate().kind {
602            Ident(name, is_raw)              =>
603                ident_can_begin_expr(name, self.span, is_raw), // value name or keyword
604            OpenDelim(Parenthesis | Brace | Bracket) | // tuple, array or block
605            Literal(..)                       | // literal
606            Not                               | // operator not
607            BinOp(Minus)                      | // unary minus
608            BinOp(Star)                       | // dereference
609            BinOp(Or) | OrOr                  | // closure
610            BinOp(And)                        | // reference
611            AndAnd                            | // double reference
612            // DotDotDot is no longer supported, but we need some way to display the error
613            DotDot | DotDotDot | DotDotEq     | // range notation
614            Lt | BinOp(Shl)                   | // associated path
615            PathSep                           | // global path
616            Lifetime(..)                      | // labeled loop
617            Pound                             => true, // expression attributes
618            Interpolated(ref nt) =>
619                matches!(&**nt,
620                    NtBlock(..)   |
621                    NtExpr(..)    |
622                    NtLiteral(..) |
623                    NtPath(..)
624                ),
625            OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
626                MetaVarKind::Block |
627                MetaVarKind::Expr { .. } |
628                MetaVarKind::Literal |
629                MetaVarKind::Path
630            ))) => true,
631            _ => false,
632        }
633    }
634
635    /// Returns `true` if the token can appear at the start of a pattern.
636    ///
637    /// Shamelessly borrowed from `can_begin_expr`, only used for diagnostics right now.
638    pub fn can_begin_pattern(&self, pat_kind: NtPatKind) -> bool {
639        match &self.uninterpolate().kind {
640            // box, ref, mut, and other identifiers (can stricten)
641            Ident(..) | NtIdent(..) |
642            OpenDelim(Delimiter::Parenthesis) |  // tuple pattern
643            OpenDelim(Delimiter::Bracket) |      // slice pattern
644            BinOp(And) |                  // reference
645            BinOp(Minus) |                // negative literal
646            AndAnd |                      // double reference
647            Literal(_) |                  // literal
648            DotDot |                      // range pattern (future compat)
649            DotDotDot |                   // range pattern (future compat)
650            PathSep |                     // path
651            Lt |                          // path (UFCS constant)
652            BinOp(Shl) => true,           // path (double UFCS)
653            // leading vert `|` or-pattern
654            BinOp(Or) => matches!(pat_kind, PatWithOr),
655            Interpolated(nt) =>
656                matches!(&**nt,
657                    | NtExpr(..)
658                    | NtLiteral(..)
659                    | NtMeta(..)
660                    | NtPat(..)
661                    | NtPath(..)
662                    | NtTy(..)
663                ),
664            OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
665                MetaVarKind::Expr { .. } |
666                MetaVarKind::Literal |
667                MetaVarKind::Meta |
668                MetaVarKind::Pat(_) |
669                MetaVarKind::Path |
670                MetaVarKind::Ty
671            ))) => true,
672            _ => false,
673        }
674    }
675
676    /// Returns `true` if the token can appear at the start of a type.
677    pub fn can_begin_type(&self) -> bool {
678        match self.uninterpolate().kind {
679            Ident(name, is_raw)        =>
680                ident_can_begin_type(name, self.span, is_raw), // type name or keyword
681            OpenDelim(Delimiter::Parenthesis) | // tuple
682            OpenDelim(Delimiter::Bracket)     | // array
683            Not                         | // never
684            BinOp(Star)                 | // raw pointer
685            BinOp(And)                  | // reference
686            AndAnd                      | // double reference
687            Question                    | // maybe bound in trait object
688            Lifetime(..)                | // lifetime bound in trait object
689            Lt | BinOp(Shl)             | // associated path
690            PathSep                      => true, // global path
691            Interpolated(ref nt) => matches!(&**nt, NtTy(..) | NtPath(..)),
692            OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
693                MetaVarKind::Ty |
694                MetaVarKind::Path
695            ))) => true,
696            // For anonymous structs or unions, which only appear in specific positions
697            // (type of struct fields or union fields), we don't consider them as regular types
698            _ => false,
699        }
700    }
701
702    /// Returns `true` if the token can appear at the start of a const param.
703    pub fn can_begin_const_arg(&self) -> bool {
704        match self.kind {
705            OpenDelim(Delimiter::Brace) | Literal(..) | BinOp(Minus) => true,
706            Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true,
707            Interpolated(ref nt) => matches!(&**nt, NtExpr(..) | NtBlock(..) | NtLiteral(..)),
708            OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(
709                MetaVarKind::Expr { .. } | MetaVarKind::Block | MetaVarKind::Literal,
710            ))) => true,
711            _ => false,
712        }
713    }
714
715    /// Returns `true` if the token can appear at the start of an item.
716    pub fn can_begin_item(&self) -> bool {
717        match self.kind {
718            Ident(name, _) => [
719                kw::Fn,
720                kw::Use,
721                kw::Struct,
722                kw::Enum,
723                kw::Pub,
724                kw::Trait,
725                kw::Extern,
726                kw::Impl,
727                kw::Unsafe,
728                kw::Const,
729                kw::Safe,
730                kw::Static,
731                kw::Union,
732                kw::Macro,
733                kw::Mod,
734                kw::Type,
735            ]
736            .contains(&name),
737            _ => false,
738        }
739    }
740
741    /// Returns `true` if the token is any literal.
742    pub fn is_lit(&self) -> bool {
743        matches!(self.kind, Literal(..))
744    }
745
746    /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
747    /// for example a '-42', or one of the boolean idents).
748    ///
749    /// In other words, would this token be a valid start of `parse_literal_maybe_minus`?
750    ///
751    /// Keep this in sync with and `Lit::from_token`, excluding unary negation.
752    pub fn can_begin_literal_maybe_minus(&self) -> bool {
753        match self.uninterpolate().kind {
754            Literal(..) | BinOp(Minus) => true,
755            Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true,
756            Interpolated(ref nt) => match &**nt {
757                NtLiteral(_) => true,
758                NtExpr(e) => match &e.kind {
759                    ast::ExprKind::Lit(_) => true,
760                    ast::ExprKind::Unary(ast::UnOp::Neg, e) => {
761                        matches!(&e.kind, ast::ExprKind::Lit(_))
762                    }
763                    _ => false,
764                },
765                _ => false,
766            },
767            OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind))) => match mv_kind {
768                MetaVarKind::Literal => true,
769                MetaVarKind::Expr { can_begin_literal_maybe_minus, .. } => {
770                    can_begin_literal_maybe_minus
771                }
772                _ => false,
773            },
774            _ => false,
775        }
776    }
777
778    pub fn can_begin_string_literal(&self) -> bool {
779        match self.uninterpolate().kind {
780            Literal(..) => true,
781            Interpolated(ref nt) => match &**nt {
782                NtLiteral(_) => true,
783                NtExpr(e) => match &e.kind {
784                    ast::ExprKind::Lit(_) => true,
785                    _ => false,
786                },
787                _ => false,
788            },
789            OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind))) => match mv_kind {
790                MetaVarKind::Literal => true,
791                MetaVarKind::Expr { can_begin_string_literal, .. } => can_begin_string_literal,
792                _ => false,
793            },
794            _ => false,
795        }
796    }
797
798    /// A convenience function for matching on identifiers during parsing.
799    /// Turns interpolated identifier (`$i: ident`) or lifetime (`$l: lifetime`) token
800    /// into the regular identifier or lifetime token it refers to,
801    /// otherwise returns the original token.
802    pub fn uninterpolate(&self) -> Cow<'_, Token> {
803        match self.kind {
804            NtIdent(ident, is_raw) => Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span)),
805            NtLifetime(ident, is_raw) => {
806                Cow::Owned(Token::new(Lifetime(ident.name, is_raw), ident.span))
807            }
808            _ => Cow::Borrowed(self),
809        }
810    }
811
812    /// Returns an identifier if this token is an identifier.
813    #[inline]
814    pub fn ident(&self) -> Option<(Ident, IdentIsRaw)> {
815        // We avoid using `Token::uninterpolate` here because it's slow.
816        match self.kind {
817            Ident(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),
818            NtIdent(ident, is_raw) => Some((ident, is_raw)),
819            _ => None,
820        }
821    }
822
823    /// Returns a lifetime identifier if this token is a lifetime.
824    #[inline]
825    pub fn lifetime(&self) -> Option<(Ident, IdentIsRaw)> {
826        // We avoid using `Token::uninterpolate` here because it's slow.
827        match self.kind {
828            Lifetime(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),
829            NtLifetime(ident, is_raw) => Some((ident, is_raw)),
830            _ => None,
831        }
832    }
833
834    /// Returns `true` if the token is an identifier.
835    pub fn is_ident(&self) -> bool {
836        self.ident().is_some()
837    }
838
839    /// Returns `true` if the token is a lifetime.
840    pub fn is_lifetime(&self) -> bool {
841        self.lifetime().is_some()
842    }
843
844    /// Returns `true` if the token is an identifier whose name is the given
845    /// string slice.
846    pub fn is_ident_named(&self, name: Symbol) -> bool {
847        self.ident().is_some_and(|(ident, _)| ident.name == name)
848    }
849
850    /// Returns `true` if the token is an interpolated path.
851    fn is_whole_path(&self) -> bool {
852        if let Interpolated(nt) = &self.kind
853            && let NtPath(..) = &**nt
854        {
855            return true;
856        }
857
858        false
859    }
860
861    /// Is this a pre-parsed expression dropped into the token stream
862    /// (which happens while parsing the result of macro expansion)?
863    pub fn is_whole_expr(&self) -> bool {
864        if let Interpolated(nt) = &self.kind
865            && let NtExpr(_) | NtLiteral(_) | NtPath(_) | NtBlock(_) = &**nt
866        {
867            return true;
868        }
869
870        false
871    }
872
873    /// Is the token an interpolated block (`$b:block`)?
874    pub fn is_whole_block(&self) -> bool {
875        if let Interpolated(nt) = &self.kind
876            && let NtBlock(..) = &**nt
877        {
878            return true;
879        }
880
881        false
882    }
883
884    /// Returns `true` if the token is either the `mut` or `const` keyword.
885    pub fn is_mutability(&self) -> bool {
886        self.is_keyword(kw::Mut) || self.is_keyword(kw::Const)
887    }
888
889    pub fn is_qpath_start(&self) -> bool {
890        self == &Lt || self == &BinOp(Shl)
891    }
892
893    pub fn is_path_start(&self) -> bool {
894        self == &PathSep
895            || self.is_qpath_start()
896            || self.is_whole_path()
897            || self.is_path_segment_keyword()
898            || self.is_ident() && !self.is_reserved_ident()
899    }
900
901    /// Returns `true` if the token is a given keyword, `kw`.
902    pub fn is_keyword(&self, kw: Symbol) -> bool {
903        self.is_non_raw_ident_where(|id| id.name == kw)
904    }
905
906    /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this
907    /// token is an identifier equal to `kw` ignoring the case.
908    pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
909        self.is_keyword(kw)
910            || (case == Case::Insensitive
911                && self.is_non_raw_ident_where(|id| {
912                    // Do an ASCII case-insensitive match, because all keywords are ASCII.
913                    id.name.as_str().eq_ignore_ascii_case(kw.as_str())
914                }))
915    }
916
917    pub fn is_path_segment_keyword(&self) -> bool {
918        self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
919    }
920
921    /// Don't use this unless you're doing something very loose and heuristic-y.
922    pub fn is_any_keyword(&self) -> bool {
923        self.is_non_raw_ident_where(Ident::is_any_keyword)
924    }
925
926    /// Returns true for reserved identifiers used internally for elided lifetimes,
927    /// unnamed method parameters, crate root module, error recovery etc.
928    pub fn is_special_ident(&self) -> bool {
929        self.is_non_raw_ident_where(Ident::is_special)
930    }
931
932    /// Returns `true` if the token is a keyword used in the language.
933    pub fn is_used_keyword(&self) -> bool {
934        self.is_non_raw_ident_where(Ident::is_used_keyword)
935    }
936
937    /// Returns `true` if the token is a keyword reserved for possible future use.
938    pub fn is_unused_keyword(&self) -> bool {
939        self.is_non_raw_ident_where(Ident::is_unused_keyword)
940    }
941
942    /// Returns `true` if the token is either a special identifier or a keyword.
943    pub fn is_reserved_ident(&self) -> bool {
944        self.is_non_raw_ident_where(Ident::is_reserved)
945    }
946
947    /// Returns `true` if the token is the identifier `true` or `false`.
948    pub fn is_bool_lit(&self) -> bool {
949        self.is_non_raw_ident_where(|id| id.name.is_bool_lit())
950    }
951
952    pub fn is_numeric_lit(&self) -> bool {
953        matches!(
954            self.kind,
955            Literal(Lit { kind: LitKind::Integer, .. }) | Literal(Lit { kind: LitKind::Float, .. })
956        )
957    }
958
959    /// Returns `true` if the token is the integer literal.
960    pub fn is_integer_lit(&self) -> bool {
961        matches!(self.kind, Literal(Lit { kind: LitKind::Integer, .. }))
962    }
963
964    /// Returns `true` if the token is a non-raw identifier for which `pred` holds.
965    pub fn is_non_raw_ident_where(&self, pred: impl FnOnce(Ident) -> bool) -> bool {
966        match self.ident() {
967            Some((id, IdentIsRaw::No)) => pred(id),
968            _ => false,
969        }
970    }
971
972    pub fn glue(&self, joint: &Token) -> Option<Token> {
973        let kind = match self.kind {
974            Eq => match joint.kind {
975                Eq => EqEq,
976                Gt => FatArrow,
977                _ => return None,
978            },
979            Lt => match joint.kind {
980                Eq => Le,
981                Lt => BinOp(Shl),
982                Le => BinOpEq(Shl),
983                BinOp(Minus) => LArrow,
984                _ => return None,
985            },
986            Gt => match joint.kind {
987                Eq => Ge,
988                Gt => BinOp(Shr),
989                Ge => BinOpEq(Shr),
990                _ => return None,
991            },
992            Not => match joint.kind {
993                Eq => Ne,
994                _ => return None,
995            },
996            BinOp(op) => match joint.kind {
997                Eq => BinOpEq(op),
998                BinOp(And) if op == And => AndAnd,
999                BinOp(Or) if op == Or => OrOr,
1000                Gt if op == Minus => RArrow,
1001                _ => return None,
1002            },
1003            Dot => match joint.kind {
1004                Dot => DotDot,
1005                DotDot => DotDotDot,
1006                _ => return None,
1007            },
1008            DotDot => match joint.kind {
1009                Dot => DotDotDot,
1010                Eq => DotDotEq,
1011                _ => return None,
1012            },
1013            Colon => match joint.kind {
1014                Colon => PathSep,
1015                _ => return None,
1016            },
1017            SingleQuote => match joint.kind {
1018                Ident(name, is_raw) => Lifetime(Symbol::intern(&format!("'{name}")), is_raw),
1019                _ => return None,
1020            },
1021
1022            Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot
1023            | DotDotEq | Comma | Semi | PathSep | RArrow | LArrow | FatArrow | Pound | Dollar
1024            | Question | OpenDelim(..) | CloseDelim(..) | Literal(..) | Ident(..) | NtIdent(..)
1025            | Lifetime(..) | NtLifetime(..) | Interpolated(..) | DocComment(..) | Eof => {
1026                return None;
1027            }
1028        };
1029
1030        Some(Token::new(kind, self.span.to(joint.span)))
1031    }
1032}
1033
1034impl PartialEq<TokenKind> for Token {
1035    #[inline]
1036    fn eq(&self, rhs: &TokenKind) -> bool {
1037        self.kind == *rhs
1038    }
1039}
1040
1041#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1042pub enum NtPatKind {
1043    // Matches or-patterns. Was written using `pat` in edition 2021 or later.
1044    PatWithOr,
1045    // Doesn't match or-patterns.
1046    // - `inferred`: was written using `pat` in edition 2015 or 2018.
1047    // - `!inferred`: was written using `pat_param`.
1048    PatParam { inferred: bool },
1049}
1050
1051#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1052pub enum NtExprKind {
1053    // Matches expressions using the post-edition 2024. Was written using
1054    // `expr` in edition 2024 or later.
1055    Expr,
1056    // Matches expressions using the pre-edition 2024 rules.
1057    // - `inferred`: was written using `expr` in edition 2021 or earlier.
1058    // - `!inferred`: was written using `expr_2021`.
1059    Expr2021 { inferred: bool },
1060}
1061
1062#[derive(Clone, Encodable, Decodable)]
1063/// For interpolation during macro expansion.
1064pub enum Nonterminal {
1065    NtItem(P<ast::Item>),
1066    NtBlock(P<ast::Block>),
1067    NtStmt(P<ast::Stmt>),
1068    NtPat(P<ast::Pat>),
1069    NtExpr(P<ast::Expr>),
1070    NtTy(P<ast::Ty>),
1071    NtLiteral(P<ast::Expr>),
1072    /// Stuff inside brackets for attributes
1073    NtMeta(P<ast::AttrItem>),
1074    NtPath(P<ast::Path>),
1075    NtVis(P<ast::Visibility>),
1076}
1077
1078#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1079pub enum NonterminalKind {
1080    Item,
1081    Block,
1082    Stmt,
1083    Pat(NtPatKind),
1084    Expr(NtExprKind),
1085    Ty,
1086    Ident,
1087    Lifetime,
1088    Literal,
1089    Meta,
1090    Path,
1091    Vis,
1092    TT,
1093}
1094
1095impl NonterminalKind {
1096    /// The `edition` closure is used to get the edition for the given symbol. Doing
1097    /// `span.edition()` is expensive, so we do it lazily.
1098    pub fn from_symbol(
1099        symbol: Symbol,
1100        edition: impl FnOnce() -> Edition,
1101    ) -> Option<NonterminalKind> {
1102        Some(match symbol {
1103            sym::item => NonterminalKind::Item,
1104            sym::block => NonterminalKind::Block,
1105            sym::stmt => NonterminalKind::Stmt,
1106            sym::pat => {
1107                if edition().at_least_rust_2021() {
1108                    NonterminalKind::Pat(PatWithOr)
1109                } else {
1110                    NonterminalKind::Pat(PatParam { inferred: true })
1111                }
1112            }
1113            sym::pat_param => NonterminalKind::Pat(PatParam { inferred: false }),
1114            sym::expr => {
1115                if edition().at_least_rust_2024() {
1116                    NonterminalKind::Expr(Expr)
1117                } else {
1118                    NonterminalKind::Expr(Expr2021 { inferred: true })
1119                }
1120            }
1121            sym::expr_2021 => NonterminalKind::Expr(Expr2021 { inferred: false }),
1122            sym::ty => NonterminalKind::Ty,
1123            sym::ident => NonterminalKind::Ident,
1124            sym::lifetime => NonterminalKind::Lifetime,
1125            sym::literal => NonterminalKind::Literal,
1126            sym::meta => NonterminalKind::Meta,
1127            sym::path => NonterminalKind::Path,
1128            sym::vis => NonterminalKind::Vis,
1129            sym::tt => NonterminalKind::TT,
1130            _ => return None,
1131        })
1132    }
1133
1134    fn symbol(self) -> Symbol {
1135        match self {
1136            NonterminalKind::Item => sym::item,
1137            NonterminalKind::Block => sym::block,
1138            NonterminalKind::Stmt => sym::stmt,
1139            NonterminalKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat,
1140            NonterminalKind::Pat(PatParam { inferred: false }) => sym::pat_param,
1141            NonterminalKind::Expr(Expr2021 { inferred: true } | Expr) => sym::expr,
1142            NonterminalKind::Expr(Expr2021 { inferred: false }) => sym::expr_2021,
1143            NonterminalKind::Ty => sym::ty,
1144            NonterminalKind::Ident => sym::ident,
1145            NonterminalKind::Lifetime => sym::lifetime,
1146            NonterminalKind::Literal => sym::literal,
1147            NonterminalKind::Meta => sym::meta,
1148            NonterminalKind::Path => sym::path,
1149            NonterminalKind::Vis => sym::vis,
1150            NonterminalKind::TT => sym::tt,
1151        }
1152    }
1153}
1154
1155impl fmt::Display for NonterminalKind {
1156    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1157        write!(f, "{}", self.symbol())
1158    }
1159}
1160
1161impl Nonterminal {
1162    pub fn use_span(&self) -> Span {
1163        match self {
1164            NtItem(item) => item.span,
1165            NtBlock(block) => block.span,
1166            NtStmt(stmt) => stmt.span,
1167            NtPat(pat) => pat.span,
1168            NtExpr(expr) | NtLiteral(expr) => expr.span,
1169            NtTy(ty) => ty.span,
1170            NtMeta(attr_item) => attr_item.span(),
1171            NtPath(path) => path.span,
1172            NtVis(vis) => vis.span,
1173        }
1174    }
1175
1176    pub fn descr(&self) -> &'static str {
1177        match self {
1178            NtItem(..) => "item",
1179            NtBlock(..) => "block",
1180            NtStmt(..) => "statement",
1181            NtPat(..) => "pattern",
1182            NtExpr(..) => "expression",
1183            NtLiteral(..) => "literal",
1184            NtTy(..) => "type",
1185            NtMeta(..) => "attribute",
1186            NtPath(..) => "path",
1187            NtVis(..) => "visibility",
1188        }
1189    }
1190}
1191
1192impl PartialEq for Nonterminal {
1193    fn eq(&self, _rhs: &Self) -> bool {
1194        // FIXME: Assume that all nonterminals are not equal, we can't compare them
1195        // correctly based on data from AST. This will prevent them from matching each other
1196        // in macros. The comparison will become possible only when each nonterminal has an
1197        // attached token stream from which it was parsed.
1198        false
1199    }
1200}
1201
1202impl fmt::Debug for Nonterminal {
1203    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1204        match *self {
1205            NtItem(..) => f.pad("NtItem(..)"),
1206            NtBlock(..) => f.pad("NtBlock(..)"),
1207            NtStmt(..) => f.pad("NtStmt(..)"),
1208            NtPat(..) => f.pad("NtPat(..)"),
1209            NtExpr(..) => f.pad("NtExpr(..)"),
1210            NtTy(..) => f.pad("NtTy(..)"),
1211            NtLiteral(..) => f.pad("NtLiteral(..)"),
1212            NtMeta(..) => f.pad("NtMeta(..)"),
1213            NtPath(..) => f.pad("NtPath(..)"),
1214            NtVis(..) => f.pad("NtVis(..)"),
1215        }
1216    }
1217}
1218
1219impl<CTX> HashStable<CTX> for Nonterminal
1220where
1221    CTX: crate::HashStableContext,
1222{
1223    fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
1224        panic!("interpolated tokens should not be present in the HIR")
1225    }
1226}
1227
1228// Some types are used a lot. Make sure they don't unintentionally get bigger.
1229#[cfg(target_pointer_width = "64")]
1230mod size_asserts {
1231    use rustc_data_structures::static_assert_size;
1232
1233    use super::*;
1234    // tidy-alphabetical-start
1235    static_assert_size!(Lit, 12);
1236    static_assert_size!(LitKind, 2);
1237    static_assert_size!(Nonterminal, 16);
1238    static_assert_size!(Token, 24);
1239    static_assert_size!(TokenKind, 16);
1240    // tidy-alphabetical-end
1241}