rustc_ast/
token.rs

1use std::borrow::Cow;
2use std::fmt;
3
4pub use LitKind::*;
5pub use NtExprKind::*;
6pub use NtPatKind::*;
7pub use TokenKind::*;
8use rustc_macros::{Decodable, Encodable, HashStable_Generic};
9use rustc_span::edition::Edition;
10use rustc_span::symbol::IdentPrintMode;
11use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span, kw, sym};
12#[allow(clippy::useless_attribute)] // FIXME: following use of `hidden_glob_reexports` incorrectly triggers `useless_attribute` lint.
13#[allow(hidden_glob_reexports)]
14use rustc_span::{Ident, Symbol};
15
16use crate::ast;
17use crate::util::case::Case;
18
19/// Represents the kind of doc comment it is, ie `///` or `#[doc = ""]`.
20#[derive(Clone, Copy, PartialEq, Eq, Encodable, Decodable, Debug, HashStable_Generic)]
21pub enum DocFragmentKind {
22    /// A sugared doc comment: `///` or `//!` or `/**` or `/*!`.
23    Sugared(CommentKind),
24    /// A "raw" doc comment: `#[doc = ""]`. The `Span` represents the string literal.
25    Raw(Span),
26}
27
28impl DocFragmentKind {
29    pub fn is_sugared(self) -> bool {
30        matches!(self, Self::Sugared(_))
31    }
32
33    /// If it is `Sugared`, it will return its associated `CommentKind`, otherwise it will return
34    /// `CommentKind::Line`.
35    pub fn comment_kind(self) -> CommentKind {
36        match self {
37            Self::Sugared(kind) => kind,
38            Self::Raw(_) => CommentKind::Line,
39        }
40    }
41}
42
43#[derive(Clone, Copy, PartialEq, Eq, Encodable, Decodable, Debug, HashStable_Generic)]
44pub enum CommentKind {
45    Line,
46    Block,
47}
48
49#[derive(Copy, Clone, PartialEq, Debug, Encodable, Decodable, HashStable_Generic)]
50pub enum InvisibleOrigin {
51    // From the expansion of a metavariable in a declarative macro.
52    MetaVar(MetaVarKind),
53
54    // Converted from `proc_macro::Delimiter` in
55    // `proc_macro::Delimiter::to_internal`, i.e. returned by a proc macro.
56    ProcMacro,
57}
58
59impl InvisibleOrigin {
60    // Should the parser skip these invisible delimiters? Ideally this function
61    // will eventually disappear and no invisible delimiters will be skipped.
62    #[inline]
63    pub fn skip(&self) -> bool {
64        match self {
65            InvisibleOrigin::MetaVar(_) => false,
66            InvisibleOrigin::ProcMacro => true,
67        }
68    }
69}
70
71/// Annoyingly similar to `NonterminalKind`, but the slight differences are important.
72#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
73pub enum MetaVarKind {
74    Item,
75    Block,
76    Stmt,
77    Pat(NtPatKind),
78    Expr {
79        kind: NtExprKind,
80        // This field is needed for `Token::can_begin_literal_maybe_minus`.
81        can_begin_literal_maybe_minus: bool,
82        // This field is needed for `Token::can_begin_string_literal`.
83        can_begin_string_literal: bool,
84    },
85    Ty {
86        is_path: bool,
87    },
88    Ident,
89    Lifetime,
90    Literal,
91    Meta {
92        /// Will `AttrItem::meta` succeed on this, if reparsed?
93        has_meta_form: bool,
94    },
95    Path,
96    Vis,
97    TT,
98}
99
100impl fmt::Display for MetaVarKind {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        let sym = match self {
103            MetaVarKind::Item => sym::item,
104            MetaVarKind::Block => sym::block,
105            MetaVarKind::Stmt => sym::stmt,
106            MetaVarKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat,
107            MetaVarKind::Pat(PatParam { inferred: false }) => sym::pat_param,
108            MetaVarKind::Expr { kind: Expr2021 { inferred: true } | Expr, .. } => sym::expr,
109            MetaVarKind::Expr { kind: Expr2021 { inferred: false }, .. } => sym::expr_2021,
110            MetaVarKind::Ty { .. } => sym::ty,
111            MetaVarKind::Ident => sym::ident,
112            MetaVarKind::Lifetime => sym::lifetime,
113            MetaVarKind::Literal => sym::literal,
114            MetaVarKind::Meta { .. } => sym::meta,
115            MetaVarKind::Path => sym::path,
116            MetaVarKind::Vis => sym::vis,
117            MetaVarKind::TT => sym::tt,
118        };
119        write!(f, "{sym}")
120    }
121}
122
123/// Describes how a sequence of token trees is delimited.
124/// Cannot use `proc_macro::Delimiter` directly because this
125/// structure should implement some additional traits.
126#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
127pub enum Delimiter {
128    /// `( ... )`
129    Parenthesis,
130    /// `{ ... }`
131    Brace,
132    /// `[ ... ]`
133    Bracket,
134    /// `∅ ... ∅`
135    /// An invisible delimiter, that may, for example, appear around tokens coming from a
136    /// "macro variable" `$var`. It is important to preserve operator priorities in cases like
137    /// `$var * 3` where `$var` is `1 + 2`.
138    /// Invisible delimiters might not survive roundtrip of a token stream through a string.
139    Invisible(InvisibleOrigin),
140}
141
142impl Delimiter {
143    // Should the parser skip these delimiters? Only happens for certain kinds
144    // of invisible delimiters. Ideally this function will eventually disappear
145    // and no invisible delimiters will be skipped.
146    #[inline]
147    pub fn skip(&self) -> bool {
148        match self {
149            Delimiter::Parenthesis | Delimiter::Bracket | Delimiter::Brace => false,
150            Delimiter::Invisible(origin) => origin.skip(),
151        }
152    }
153
154    // This exists because `InvisibleOrigin`s should not be compared. It is only used for
155    // assertions.
156    pub fn eq_ignoring_invisible_origin(&self, other: &Delimiter) -> bool {
157        match (self, other) {
158            (Delimiter::Parenthesis, Delimiter::Parenthesis) => true,
159            (Delimiter::Brace, Delimiter::Brace) => true,
160            (Delimiter::Bracket, Delimiter::Bracket) => true,
161            (Delimiter::Invisible(_), Delimiter::Invisible(_)) => true,
162            _ => false,
163        }
164    }
165
166    pub fn as_open_token_kind(&self) -> TokenKind {
167        match *self {
168            Delimiter::Parenthesis => OpenParen,
169            Delimiter::Brace => OpenBrace,
170            Delimiter::Bracket => OpenBracket,
171            Delimiter::Invisible(origin) => OpenInvisible(origin),
172        }
173    }
174
175    pub fn as_close_token_kind(&self) -> TokenKind {
176        match *self {
177            Delimiter::Parenthesis => CloseParen,
178            Delimiter::Brace => CloseBrace,
179            Delimiter::Bracket => CloseBracket,
180            Delimiter::Invisible(origin) => CloseInvisible(origin),
181        }
182    }
183}
184
185// Note that the suffix is *not* considered when deciding the `LitKind` in this
186// type. This means that float literals like `1f32` are classified by this type
187// as `Int`. Only upon conversion to `ast::LitKind` will such a literal be
188// given the `Float` kind.
189#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
190pub enum LitKind {
191    Bool, // AST only, must never appear in a `Token`
192    Byte,
193    Char,
194    Integer, // e.g. `1`, `1u8`, `1f32`
195    Float,   // e.g. `1.`, `1.0`, `1e3f32`
196    Str,
197    StrRaw(u8), // raw string delimited by `n` hash symbols
198    ByteStr,
199    ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
200    CStr,
201    CStrRaw(u8),
202    Err(ErrorGuaranteed),
203}
204
205/// A literal token.
206#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
207pub struct Lit {
208    pub kind: LitKind,
209    pub symbol: Symbol,
210    pub suffix: Option<Symbol>,
211}
212
213impl Lit {
214    pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit {
215        Lit { kind, symbol, suffix }
216    }
217
218    /// Returns `true` if this is semantically a float literal. This includes
219    /// ones like `1f32` that have an `Integer` kind but a float suffix.
220    pub fn is_semantic_float(&self) -> bool {
221        match self.kind {
222            LitKind::Float => true,
223            LitKind::Integer => match self.suffix {
224                Some(sym) => sym == sym::f32 || sym == sym::f64,
225                None => false,
226            },
227            _ => false,
228        }
229    }
230
231    /// Keep this in sync with `Token::can_begin_literal_maybe_minus` and
232    /// `Parser::eat_token_lit` (excluding unary negation).
233    pub fn from_token(token: &Token) -> Option<Lit> {
234        match token.uninterpolate().kind {
235            Ident(name, IdentIsRaw::No) if name.is_bool_lit() => Some(Lit::new(Bool, name, None)),
236            Literal(token_lit) => Some(token_lit),
237            OpenInvisible(InvisibleOrigin::MetaVar(
238                MetaVarKind::Literal | MetaVarKind::Expr { .. },
239            )) => {
240                // Unreachable with the current test suite.
241                panic!("from_token metavar");
242            }
243            _ => None,
244        }
245    }
246}
247
248impl fmt::Display for Lit {
249    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250        let Lit { kind, symbol, suffix } = *self;
251        match kind {
252            Byte => write!(f, "b'{symbol}'")?,
253            Char => write!(f, "'{symbol}'")?,
254            Str => write!(f, "\"{symbol}\"")?,
255            StrRaw(n) => write!(
256                f,
257                "r{delim}\"{string}\"{delim}",
258                delim = "#".repeat(n as usize),
259                string = symbol
260            )?,
261            ByteStr => write!(f, "b\"{symbol}\"")?,
262            ByteStrRaw(n) => write!(
263                f,
264                "br{delim}\"{string}\"{delim}",
265                delim = "#".repeat(n as usize),
266                string = symbol
267            )?,
268            CStr => write!(f, "c\"{symbol}\"")?,
269            CStrRaw(n) => {
270                write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
271            }
272            Integer | Float | Bool | Err(_) => write!(f, "{symbol}")?,
273        }
274
275        if let Some(suffix) = suffix {
276            write!(f, "{suffix}")?;
277        }
278
279        Ok(())
280    }
281}
282
283impl LitKind {
284    /// An English article for the literal token kind.
285    pub fn article(self) -> &'static str {
286        match self {
287            Integer | Err(_) => "an",
288            _ => "a",
289        }
290    }
291
292    pub fn descr(self) -> &'static str {
293        match self {
294            Bool => "boolean",
295            Byte => "byte",
296            Char => "char",
297            Integer => "integer",
298            Float => "float",
299            Str | StrRaw(..) => "string",
300            ByteStr | ByteStrRaw(..) => "byte string",
301            CStr | CStrRaw(..) => "C string",
302            Err(_) => "error",
303        }
304    }
305
306    pub(crate) fn may_have_suffix(self) -> bool {
307        matches!(self, Integer | Float | Err(_))
308    }
309}
310
311pub fn ident_can_begin_expr(name: Symbol, span: Span, is_raw: IdentIsRaw) -> bool {
312    let ident_token = Token::new(Ident(name, is_raw), span);
313
314    !ident_token.is_reserved_ident()
315        || ident_token.is_path_segment_keyword()
316        || [
317            kw::Async,
318            kw::Do,
319            kw::Box,
320            kw::Break,
321            kw::Const,
322            kw::Continue,
323            kw::False,
324            kw::For,
325            kw::Gen,
326            kw::If,
327            kw::Let,
328            kw::Loop,
329            kw::Match,
330            kw::Move,
331            kw::Return,
332            kw::True,
333            kw::Try,
334            kw::Unsafe,
335            kw::While,
336            kw::Yield,
337            kw::Safe,
338            kw::Static,
339        ]
340        .contains(&name)
341}
342
343fn ident_can_begin_type(name: Symbol, span: Span, is_raw: IdentIsRaw) -> bool {
344    let ident_token = Token::new(Ident(name, is_raw), span);
345
346    !ident_token.is_reserved_ident()
347        || ident_token.is_path_segment_keyword()
348        || [kw::Underscore, kw::For, kw::Impl, kw::Fn, kw::Unsafe, kw::Extern, kw::Typeof, kw::Dyn]
349            .contains(&name)
350}
351
352#[derive(PartialEq, Encodable, Decodable, Debug, Copy, Clone, HashStable_Generic)]
353pub enum IdentIsRaw {
354    No,
355    Yes,
356}
357
358impl IdentIsRaw {
359    pub fn to_print_mode_ident(self) -> IdentPrintMode {
360        match self {
361            IdentIsRaw::No => IdentPrintMode::Normal,
362            IdentIsRaw::Yes => IdentPrintMode::RawIdent,
363        }
364    }
365    pub fn to_print_mode_lifetime(self) -> IdentPrintMode {
366        match self {
367            IdentIsRaw::No => IdentPrintMode::Normal,
368            IdentIsRaw::Yes => IdentPrintMode::RawLifetime,
369        }
370    }
371}
372
373impl From<bool> for IdentIsRaw {
374    fn from(b: bool) -> Self {
375        if b { Self::Yes } else { Self::No }
376    }
377}
378
379#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
380pub enum TokenKind {
381    /* Expression-operator symbols. */
382    /// `=`
383    Eq,
384    /// `<`
385    Lt,
386    /// `<=`
387    Le,
388    /// `==`
389    EqEq,
390    /// `!=`
391    Ne,
392    /// `>=`
393    Ge,
394    /// `>`
395    Gt,
396    /// `&&`
397    AndAnd,
398    /// `||`
399    OrOr,
400    /// `!`
401    Bang,
402    /// `~`
403    Tilde,
404    // `+`
405    Plus,
406    // `-`
407    Minus,
408    // `*`
409    Star,
410    // `/`
411    Slash,
412    // `%`
413    Percent,
414    // `^`
415    Caret,
416    // `&`
417    And,
418    // `|`
419    Or,
420    // `<<`
421    Shl,
422    // `>>`
423    Shr,
424    // `+=`
425    PlusEq,
426    // `-=`
427    MinusEq,
428    // `*=`
429    StarEq,
430    // `/=`
431    SlashEq,
432    // `%=`
433    PercentEq,
434    // `^=`
435    CaretEq,
436    // `&=`
437    AndEq,
438    // `|=`
439    OrEq,
440    // `<<=`
441    ShlEq,
442    // `>>=`
443    ShrEq,
444
445    /* Structural symbols */
446    /// `@`
447    At,
448    /// `.`
449    Dot,
450    /// `..`
451    DotDot,
452    /// `...`
453    DotDotDot,
454    /// `..=`
455    DotDotEq,
456    /// `,`
457    Comma,
458    /// `;`
459    Semi,
460    /// `:`
461    Colon,
462    /// `::`
463    PathSep,
464    /// `->`
465    RArrow,
466    /// `<-`
467    LArrow,
468    /// `=>`
469    FatArrow,
470    /// `#`
471    Pound,
472    /// `$`
473    Dollar,
474    /// `?`
475    Question,
476    /// Used by proc macros for representing lifetimes, not generated by lexer right now.
477    SingleQuote,
478    /// `(`
479    OpenParen,
480    /// `)`
481    CloseParen,
482    /// `{`
483    OpenBrace,
484    /// `}`
485    CloseBrace,
486    /// `[`
487    OpenBracket,
488    /// `]`
489    CloseBracket,
490    /// Invisible opening delimiter, produced by a macro.
491    OpenInvisible(InvisibleOrigin),
492    /// Invisible closing delimiter, produced by a macro.
493    CloseInvisible(InvisibleOrigin),
494
495    /* Literals */
496    Literal(Lit),
497
498    /// Identifier token.
499    /// Do not forget about `NtIdent` when you want to match on identifiers.
500    /// It's recommended to use `Token::{ident,uninterpolate}` and
501    /// `Parser::token_uninterpolated_span` to treat regular and interpolated
502    /// identifiers in the same way.
503    Ident(Symbol, IdentIsRaw),
504    /// This identifier (and its span) is the identifier passed to the
505    /// declarative macro. The span in the surrounding `Token` is the span of
506    /// the `ident` metavariable in the macro's RHS.
507    NtIdent(Ident, IdentIsRaw),
508
509    /// Lifetime identifier token.
510    /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers.
511    /// It's recommended to use `Token::{ident,uninterpolate}` and
512    /// `Parser::token_uninterpolated_span` to treat regular and interpolated
513    /// identifiers in the same way.
514    Lifetime(Symbol, IdentIsRaw),
515    /// This identifier (and its span) is the lifetime passed to the
516    /// declarative macro. The span in the surrounding `Token` is the span of
517    /// the `lifetime` metavariable in the macro's RHS.
518    NtLifetime(Ident, IdentIsRaw),
519
520    /// A doc comment token.
521    /// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)
522    /// similarly to symbols in string literal tokens.
523    DocComment(CommentKind, ast::AttrStyle, Symbol),
524
525    /// End Of File
526    Eof,
527}
528
529#[derive(Clone, Copy, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
530pub struct Token {
531    pub kind: TokenKind,
532    pub span: Span,
533}
534
535impl TokenKind {
536    pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind {
537        Literal(Lit::new(kind, symbol, suffix))
538    }
539
540    /// An approximation to proc-macro-style single-character operators used by
541    /// rustc parser. If the operator token can be broken into two tokens, the
542    /// first of which has `n` (1 or 2) chars, then this function performs that
543    /// operation, otherwise it returns `None`.
544    pub fn break_two_token_op(&self, n: u32) -> Option<(TokenKind, TokenKind)> {
545        assert!(n == 1 || n == 2);
546        Some(match (self, n) {
547            (Le, 1) => (Lt, Eq),
548            (EqEq, 1) => (Eq, Eq),
549            (Ne, 1) => (Bang, Eq),
550            (Ge, 1) => (Gt, Eq),
551            (AndAnd, 1) => (And, And),
552            (OrOr, 1) => (Or, Or),
553            (Shl, 1) => (Lt, Lt),
554            (Shr, 1) => (Gt, Gt),
555            (PlusEq, 1) => (Plus, Eq),
556            (MinusEq, 1) => (Minus, Eq),
557            (StarEq, 1) => (Star, Eq),
558            (SlashEq, 1) => (Slash, Eq),
559            (PercentEq, 1) => (Percent, Eq),
560            (CaretEq, 1) => (Caret, Eq),
561            (AndEq, 1) => (And, Eq),
562            (OrEq, 1) => (Or, Eq),
563            (ShlEq, 1) => (Lt, Le),  // `<` + `<=`
564            (ShlEq, 2) => (Shl, Eq), // `<<` + `=`
565            (ShrEq, 1) => (Gt, Ge),  // `>` + `>=`
566            (ShrEq, 2) => (Shr, Eq), // `>>` + `=`
567            (DotDot, 1) => (Dot, Dot),
568            (DotDotDot, 1) => (Dot, DotDot), // `.` + `..`
569            (DotDotDot, 2) => (DotDot, Dot), // `..` + `.`
570            (DotDotEq, 2) => (DotDot, Eq),
571            (PathSep, 1) => (Colon, Colon),
572            (RArrow, 1) => (Minus, Gt),
573            (LArrow, 1) => (Lt, Minus),
574            (FatArrow, 1) => (Eq, Gt),
575            _ => return None,
576        })
577    }
578
579    /// Returns tokens that are likely to be typed accidentally instead of the current token.
580    /// Enables better error recovery when the wrong token is found.
581    pub fn similar_tokens(&self) -> &[TokenKind] {
582        match self {
583            Comma => &[Dot, Lt, Semi],
584            Semi => &[Colon, Comma],
585            Colon => &[Semi],
586            FatArrow => &[Eq, RArrow, Ge, Gt],
587            _ => &[],
588        }
589    }
590
591    pub fn should_end_const_arg(&self) -> bool {
592        matches!(self, Gt | Ge | Shr | ShrEq)
593    }
594
595    pub fn is_delim(&self) -> bool {
596        self.open_delim().is_some() || self.close_delim().is_some()
597    }
598
599    pub fn open_delim(&self) -> Option<Delimiter> {
600        match *self {
601            OpenParen => Some(Delimiter::Parenthesis),
602            OpenBrace => Some(Delimiter::Brace),
603            OpenBracket => Some(Delimiter::Bracket),
604            OpenInvisible(origin) => Some(Delimiter::Invisible(origin)),
605            _ => None,
606        }
607    }
608
609    pub fn close_delim(&self) -> Option<Delimiter> {
610        match *self {
611            CloseParen => Some(Delimiter::Parenthesis),
612            CloseBrace => Some(Delimiter::Brace),
613            CloseBracket => Some(Delimiter::Bracket),
614            CloseInvisible(origin) => Some(Delimiter::Invisible(origin)),
615            _ => None,
616        }
617    }
618
619    pub fn is_close_delim_or_eof(&self) -> bool {
620        match self {
621            CloseParen | CloseBrace | CloseBracket | CloseInvisible(_) | Eof => true,
622            _ => false,
623        }
624    }
625}
626
627impl Token {
628    pub fn new(kind: TokenKind, span: Span) -> Self {
629        Token { kind, span }
630    }
631
632    /// Some token that will be thrown away later.
633    pub fn dummy() -> Self {
634        Token::new(TokenKind::Question, DUMMY_SP)
635    }
636
637    /// Recovers a `Token` from an `Ident`. This creates a raw identifier if necessary.
638    pub fn from_ast_ident(ident: Ident) -> Self {
639        Token::new(Ident(ident.name, ident.is_raw_guess().into()), ident.span)
640    }
641
642    pub fn is_range_separator(&self) -> bool {
643        [DotDot, DotDotDot, DotDotEq].contains(&self.kind)
644    }
645
646    pub fn is_punct(&self) -> bool {
647        match self.kind {
648            Eq | Lt | Le | EqEq | Ne | Ge | Gt | AndAnd | OrOr | Bang | Tilde | Plus | Minus
649            | Star | Slash | Percent | Caret | And | Or | Shl | Shr | PlusEq | MinusEq | StarEq
650            | SlashEq | PercentEq | CaretEq | AndEq | OrEq | ShlEq | ShrEq | At | Dot | DotDot
651            | DotDotDot | DotDotEq | Comma | Semi | Colon | PathSep | RArrow | LArrow
652            | FatArrow | Pound | Dollar | Question | SingleQuote => true,
653
654            OpenParen | CloseParen | OpenBrace | CloseBrace | OpenBracket | CloseBracket
655            | OpenInvisible(_) | CloseInvisible(_) | Literal(..) | DocComment(..) | Ident(..)
656            | NtIdent(..) | Lifetime(..) | NtLifetime(..) | Eof => false,
657        }
658    }
659
660    pub fn is_like_plus(&self) -> bool {
661        matches!(self.kind, Plus | PlusEq)
662    }
663
664    /// Returns `true` if the token can appear at the start of an expression.
665    ///
666    /// **NB**: Take care when modifying this function, since it will change
667    /// the stable set of tokens that are allowed to match an expr nonterminal.
668    pub fn can_begin_expr(&self) -> bool {
669        match self.uninterpolate().kind {
670            Ident(name, is_raw)              =>
671                ident_can_begin_expr(name, self.span, is_raw), // value name or keyword
672            OpenParen                         | // tuple
673            OpenBrace                         | // block
674            OpenBracket                       | // array
675            Literal(..)                       | // literal
676            Bang                              | // operator not
677            Minus                             | // unary minus
678            Star                              | // dereference
679            Or | OrOr                         | // closure
680            And                               | // reference
681            AndAnd                            | // double reference
682            // DotDotDot is no longer supported, but we need some way to display the error
683            DotDot | DotDotDot | DotDotEq     | // range notation
684            Lt | Shl                          | // associated path
685            PathSep                           | // global path
686            Lifetime(..)                      | // labeled loop
687            Pound                             => true, // expression attributes
688            OpenInvisible(InvisibleOrigin::MetaVar(
689                MetaVarKind::Block |
690                MetaVarKind::Expr { .. } |
691                MetaVarKind::Literal |
692                MetaVarKind::Path
693            )) => true,
694            _ => false,
695        }
696    }
697
698    /// Returns `true` if the token can appear at the start of a pattern.
699    ///
700    /// Shamelessly borrowed from `can_begin_expr`, only used for diagnostics right now.
701    pub fn can_begin_pattern(&self, pat_kind: NtPatKind) -> bool {
702        match &self.uninterpolate().kind {
703            // box, ref, mut, and other identifiers (can stricten)
704            Ident(..) | NtIdent(..) |
705            OpenParen |                          // tuple pattern
706            OpenBracket |                        // slice pattern
707            And |                                // reference
708            Minus |                              // negative literal
709            AndAnd |                             // double reference
710            Literal(_) |                         // literal
711            DotDot |                             // range pattern (future compat)
712            DotDotDot |                          // range pattern (future compat)
713            PathSep |                            // path
714            Lt |                                 // path (UFCS constant)
715            Shl => true,                         // path (double UFCS)
716            Or => matches!(pat_kind, PatWithOr), // leading vert `|` or-pattern
717            OpenInvisible(InvisibleOrigin::MetaVar(
718                MetaVarKind::Expr { .. } |
719                MetaVarKind::Literal |
720                MetaVarKind::Meta { .. } |
721                MetaVarKind::Pat(_) |
722                MetaVarKind::Path |
723                MetaVarKind::Ty { .. }
724            )) => true,
725            _ => false,
726        }
727    }
728
729    /// Returns `true` if the token can appear at the start of a type.
730    pub fn can_begin_type(&self) -> bool {
731        match self.uninterpolate().kind {
732            Ident(name, is_raw) =>
733                ident_can_begin_type(name, self.span, is_raw), // type name or keyword
734            OpenParen                         | // tuple
735            OpenBracket                       | // array
736            Bang                              | // never
737            Star                              | // raw pointer
738            And                               | // reference
739            AndAnd                            | // double reference
740            Question                          | // maybe bound in trait object
741            Lifetime(..)                      | // lifetime bound in trait object
742            Lt | Shl                          | // associated path
743            PathSep => true,                    // global path
744            OpenInvisible(InvisibleOrigin::MetaVar(
745                MetaVarKind::Ty { .. } |
746                MetaVarKind::Path
747            )) => true,
748            // For anonymous structs or unions, which only appear in specific positions
749            // (type of struct fields or union fields), we don't consider them as regular types
750            _ => false,
751        }
752    }
753
754    /// Returns `true` if the token can appear at the start of a const param.
755    pub fn can_begin_const_arg(&self) -> bool {
756        match self.kind {
757            OpenBrace | Literal(..) | Minus => true,
758            Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true,
759            OpenInvisible(InvisibleOrigin::MetaVar(
760                MetaVarKind::Expr { .. } | MetaVarKind::Block | MetaVarKind::Literal,
761            )) => true,
762            _ => false,
763        }
764    }
765
766    /// Returns `true` if the token can appear at the start of an item.
767    pub fn can_begin_item(&self) -> bool {
768        match self.kind {
769            Ident(name, _) => [
770                kw::Fn,
771                kw::Use,
772                kw::Struct,
773                kw::Enum,
774                kw::Pub,
775                kw::Trait,
776                kw::Extern,
777                kw::Impl,
778                kw::Unsafe,
779                kw::Const,
780                kw::Safe,
781                kw::Static,
782                kw::Union,
783                kw::Macro,
784                kw::Mod,
785                kw::Type,
786            ]
787            .contains(&name),
788            _ => false,
789        }
790    }
791
792    /// Returns `true` if the token is any literal.
793    pub fn is_lit(&self) -> bool {
794        matches!(self.kind, Literal(..))
795    }
796
797    /// Returns `true` if the token is any literal, a minus (which can prefix a literal,
798    /// for example a '-42', or one of the boolean idents).
799    ///
800    /// In other words, would this token be a valid start of `parse_literal_maybe_minus`?
801    ///
802    /// Keep this in sync with `Lit::from_token` and `Parser::eat_token_lit`
803    /// (excluding unary negation).
804    pub fn can_begin_literal_maybe_minus(&self) -> bool {
805        match self.uninterpolate().kind {
806            Literal(..) | Minus => true,
807            Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true,
808            OpenInvisible(InvisibleOrigin::MetaVar(mv_kind)) => match mv_kind {
809                MetaVarKind::Literal => true,
810                MetaVarKind::Expr { can_begin_literal_maybe_minus, .. } => {
811                    can_begin_literal_maybe_minus
812                }
813                _ => false,
814            },
815            _ => false,
816        }
817    }
818
819    pub fn can_begin_string_literal(&self) -> bool {
820        match self.uninterpolate().kind {
821            Literal(..) => true,
822            OpenInvisible(InvisibleOrigin::MetaVar(mv_kind)) => match mv_kind {
823                MetaVarKind::Literal => true,
824                MetaVarKind::Expr { can_begin_string_literal, .. } => can_begin_string_literal,
825                _ => false,
826            },
827            _ => false,
828        }
829    }
830
831    /// A convenience function for matching on identifiers during parsing.
832    /// Turns interpolated identifier (`$i: ident`) or lifetime (`$l: lifetime`) token
833    /// into the regular identifier or lifetime token it refers to,
834    /// otherwise returns the original token.
835    pub fn uninterpolate(&self) -> Cow<'_, Token> {
836        match self.kind {
837            NtIdent(ident, is_raw) => Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span)),
838            NtLifetime(ident, is_raw) => {
839                Cow::Owned(Token::new(Lifetime(ident.name, is_raw), ident.span))
840            }
841            _ => Cow::Borrowed(self),
842        }
843    }
844
845    /// Returns an identifier if this token is an identifier.
846    #[inline]
847    pub fn ident(&self) -> Option<(Ident, IdentIsRaw)> {
848        // We avoid using `Token::uninterpolate` here because it's slow.
849        match self.kind {
850            Ident(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),
851            NtIdent(ident, is_raw) => Some((ident, is_raw)),
852            _ => None,
853        }
854    }
855
856    /// Returns a lifetime identifier if this token is a lifetime.
857    #[inline]
858    pub fn lifetime(&self) -> Option<(Ident, IdentIsRaw)> {
859        // We avoid using `Token::uninterpolate` here because it's slow.
860        match self.kind {
861            Lifetime(name, is_raw) => Some((Ident::new(name, self.span), is_raw)),
862            NtLifetime(ident, is_raw) => Some((ident, is_raw)),
863            _ => None,
864        }
865    }
866
867    /// Returns `true` if the token is an identifier.
868    pub fn is_ident(&self) -> bool {
869        self.ident().is_some()
870    }
871
872    /// Returns `true` if the token is a lifetime.
873    pub fn is_lifetime(&self) -> bool {
874        self.lifetime().is_some()
875    }
876
877    /// Returns `true` if the token is an identifier whose name is the given
878    /// string slice.
879    pub fn is_ident_named(&self, name: Symbol) -> bool {
880        self.ident().is_some_and(|(ident, _)| ident.name == name)
881    }
882
883    /// Is this a pre-parsed expression dropped into the token stream
884    /// (which happens while parsing the result of macro expansion)?
885    pub fn is_metavar_expr(&self) -> bool {
886        matches!(
887            self.is_metavar_seq(),
888            Some(
889                MetaVarKind::Expr { .. }
890                    | MetaVarKind::Literal
891                    | MetaVarKind::Path
892                    | MetaVarKind::Block
893            )
894        )
895    }
896
897    /// Are we at a block from a metavar (`$b:block`)?
898    pub fn is_metavar_block(&self) -> bool {
899        matches!(self.is_metavar_seq(), Some(MetaVarKind::Block))
900    }
901
902    /// Returns `true` if the token is either the `mut` or `const` keyword.
903    pub fn is_mutability(&self) -> bool {
904        self.is_keyword(kw::Mut) || self.is_keyword(kw::Const)
905    }
906
907    pub fn is_qpath_start(&self) -> bool {
908        matches!(self.kind, Lt | Shl)
909    }
910
911    pub fn is_path_start(&self) -> bool {
912        self.kind == PathSep
913            || self.is_qpath_start()
914            || matches!(self.is_metavar_seq(), Some(MetaVarKind::Path))
915            || self.is_path_segment_keyword()
916            || self.is_non_reserved_ident()
917    }
918
919    /// Returns `true` if the token is a given keyword, `kw`.
920    pub fn is_keyword(&self, kw: Symbol) -> bool {
921        self.is_non_raw_ident_where(|id| id.name == kw)
922    }
923
924    /// Returns `true` if the token is a given keyword, `kw` or if `case` is `Insensitive` and this
925    /// token is an identifier equal to `kw` ignoring the case.
926    pub fn is_keyword_case(&self, kw: Symbol, case: Case) -> bool {
927        self.is_keyword(kw)
928            || (case == Case::Insensitive
929                && self.is_non_raw_ident_where(|id| {
930                    // Do an ASCII case-insensitive match, because all keywords are ASCII.
931                    id.name.as_str().eq_ignore_ascii_case(kw.as_str())
932                }))
933    }
934
935    pub fn is_path_segment_keyword(&self) -> bool {
936        self.is_non_raw_ident_where(Ident::is_path_segment_keyword)
937    }
938
939    /// Returns true for reserved identifiers used internally for elided lifetimes,
940    /// unnamed method parameters, crate root module, error recovery etc.
941    pub fn is_special_ident(&self) -> bool {
942        self.is_non_raw_ident_where(Ident::is_special)
943    }
944
945    /// Returns `true` if the token is a keyword used in the language.
946    pub fn is_used_keyword(&self) -> bool {
947        self.is_non_raw_ident_where(Ident::is_used_keyword)
948    }
949
950    /// Returns `true` if the token is a keyword reserved for possible future use.
951    pub fn is_unused_keyword(&self) -> bool {
952        self.is_non_raw_ident_where(Ident::is_unused_keyword)
953    }
954
955    /// Returns `true` if the token is either a special identifier or a keyword.
956    pub fn is_reserved_ident(&self) -> bool {
957        self.is_non_raw_ident_where(Ident::is_reserved)
958    }
959
960    pub fn is_non_reserved_ident(&self) -> bool {
961        self.ident().is_some_and(|(id, raw)| raw == IdentIsRaw::Yes || !Ident::is_reserved(id))
962    }
963
964    /// Returns `true` if the token is the identifier `true` or `false`.
965    pub fn is_bool_lit(&self) -> bool {
966        self.is_non_raw_ident_where(|id| id.name.is_bool_lit())
967    }
968
969    pub fn is_numeric_lit(&self) -> bool {
970        matches!(
971            self.kind,
972            Literal(Lit { kind: LitKind::Integer, .. }) | Literal(Lit { kind: LitKind::Float, .. })
973        )
974    }
975
976    /// Returns `true` if the token is the integer literal.
977    pub fn is_integer_lit(&self) -> bool {
978        matches!(self.kind, Literal(Lit { kind: LitKind::Integer, .. }))
979    }
980
981    /// Returns `true` if the token is a non-raw identifier for which `pred` holds.
982    pub fn is_non_raw_ident_where(&self, pred: impl FnOnce(Ident) -> bool) -> bool {
983        match self.ident() {
984            Some((id, IdentIsRaw::No)) => pred(id),
985            _ => false,
986        }
987    }
988
989    /// Is this an invisible open delimiter at the start of a token sequence
990    /// from an expanded metavar?
991    pub fn is_metavar_seq(&self) -> Option<MetaVarKind> {
992        match self.kind {
993            OpenInvisible(InvisibleOrigin::MetaVar(kind)) => Some(kind),
994            _ => None,
995        }
996    }
997
998    pub fn glue(&self, joint: &Token) -> Option<Token> {
999        let kind = match (&self.kind, &joint.kind) {
1000            (Eq, Eq) => EqEq,
1001            (Eq, Gt) => FatArrow,
1002            (Eq, _) => return None,
1003
1004            (Lt, Eq) => Le,
1005            (Lt, Lt) => Shl,
1006            (Lt, Le) => ShlEq,
1007            (Lt, Minus) => LArrow,
1008            (Lt, _) => return None,
1009
1010            (Gt, Eq) => Ge,
1011            (Gt, Gt) => Shr,
1012            (Gt, Ge) => ShrEq,
1013            (Gt, _) => return None,
1014
1015            (Bang, Eq) => Ne,
1016            (Bang, _) => return None,
1017
1018            (Plus, Eq) => PlusEq,
1019            (Plus, _) => return None,
1020
1021            (Minus, Eq) => MinusEq,
1022            (Minus, Gt) => RArrow,
1023            (Minus, _) => return None,
1024
1025            (Star, Eq) => StarEq,
1026            (Star, _) => return None,
1027
1028            (Slash, Eq) => SlashEq,
1029            (Slash, _) => return None,
1030
1031            (Percent, Eq) => PercentEq,
1032            (Percent, _) => return None,
1033
1034            (Caret, Eq) => CaretEq,
1035            (Caret, _) => return None,
1036
1037            (And, Eq) => AndEq,
1038            (And, And) => AndAnd,
1039            (And, _) => return None,
1040
1041            (Or, Eq) => OrEq,
1042            (Or, Or) => OrOr,
1043            (Or, _) => return None,
1044
1045            (Shl, Eq) => ShlEq,
1046            (Shl, _) => return None,
1047
1048            (Shr, Eq) => ShrEq,
1049            (Shr, _) => return None,
1050
1051            (Dot, Dot) => DotDot,
1052            (Dot, DotDot) => DotDotDot,
1053            (Dot, _) => return None,
1054
1055            (DotDot, Dot) => DotDotDot,
1056            (DotDot, Eq) => DotDotEq,
1057            (DotDot, _) => return None,
1058
1059            (Colon, Colon) => PathSep,
1060            (Colon, _) => return None,
1061
1062            (SingleQuote, Ident(name, is_raw)) => {
1063                Lifetime(Symbol::intern(&format!("'{name}")), *is_raw)
1064            }
1065            (SingleQuote, _) => return None,
1066
1067            (
1068                Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | PlusEq | MinusEq | StarEq | SlashEq
1069                | PercentEq | CaretEq | AndEq | OrEq | ShlEq | ShrEq | At | DotDotDot | DotDotEq
1070                | Comma | Semi | PathSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question
1071                | OpenParen | CloseParen | OpenBrace | CloseBrace | OpenBracket | CloseBracket
1072                | OpenInvisible(_) | CloseInvisible(_) | Literal(..) | Ident(..) | NtIdent(..)
1073                | Lifetime(..) | NtLifetime(..) | DocComment(..) | Eof,
1074                _,
1075            ) => {
1076                return None;
1077            }
1078        };
1079
1080        Some(Token::new(kind, self.span.to(joint.span)))
1081    }
1082}
1083
1084impl PartialEq<TokenKind> for Token {
1085    #[inline]
1086    fn eq(&self, rhs: &TokenKind) -> bool {
1087        self.kind == *rhs
1088    }
1089}
1090
1091#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1092pub enum NtPatKind {
1093    // Matches or-patterns. Was written using `pat` in edition 2021 or later.
1094    PatWithOr,
1095    // Doesn't match or-patterns.
1096    // - `inferred`: was written using `pat` in edition 2015 or 2018.
1097    // - `!inferred`: was written using `pat_param`.
1098    PatParam { inferred: bool },
1099}
1100
1101#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1102pub enum NtExprKind {
1103    // Matches expressions using the post-edition 2024. Was written using
1104    // `expr` in edition 2024 or later.
1105    Expr,
1106    // Matches expressions using the pre-edition 2024 rules.
1107    // - `inferred`: was written using `expr` in edition 2021 or earlier.
1108    // - `!inferred`: was written using `expr_2021`.
1109    Expr2021 { inferred: bool },
1110}
1111
1112/// A macro nonterminal, known in documentation as a fragment specifier.
1113#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)]
1114pub enum NonterminalKind {
1115    Item,
1116    Block,
1117    Stmt,
1118    Pat(NtPatKind),
1119    Expr(NtExprKind),
1120    Ty,
1121    Ident,
1122    Lifetime,
1123    Literal,
1124    Meta,
1125    Path,
1126    Vis,
1127    TT,
1128}
1129
1130impl NonterminalKind {
1131    /// The `edition` closure is used to get the edition for the given symbol. Doing
1132    /// `span.edition()` is expensive, so we do it lazily.
1133    pub fn from_symbol(
1134        symbol: Symbol,
1135        edition: impl FnOnce() -> Edition,
1136    ) -> Option<NonterminalKind> {
1137        Some(match symbol {
1138            sym::item => NonterminalKind::Item,
1139            sym::block => NonterminalKind::Block,
1140            sym::stmt => NonterminalKind::Stmt,
1141            sym::pat => {
1142                if edition().at_least_rust_2021() {
1143                    NonterminalKind::Pat(PatWithOr)
1144                } else {
1145                    NonterminalKind::Pat(PatParam { inferred: true })
1146                }
1147            }
1148            sym::pat_param => NonterminalKind::Pat(PatParam { inferred: false }),
1149            sym::expr => {
1150                if edition().at_least_rust_2024() {
1151                    NonterminalKind::Expr(Expr)
1152                } else {
1153                    NonterminalKind::Expr(Expr2021 { inferred: true })
1154                }
1155            }
1156            sym::expr_2021 => NonterminalKind::Expr(Expr2021 { inferred: false }),
1157            sym::ty => NonterminalKind::Ty,
1158            sym::ident => NonterminalKind::Ident,
1159            sym::lifetime => NonterminalKind::Lifetime,
1160            sym::literal => NonterminalKind::Literal,
1161            sym::meta => NonterminalKind::Meta,
1162            sym::path => NonterminalKind::Path,
1163            sym::vis => NonterminalKind::Vis,
1164            sym::tt => NonterminalKind::TT,
1165            _ => return None,
1166        })
1167    }
1168
1169    fn symbol(self) -> Symbol {
1170        match self {
1171            NonterminalKind::Item => sym::item,
1172            NonterminalKind::Block => sym::block,
1173            NonterminalKind::Stmt => sym::stmt,
1174            NonterminalKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat,
1175            NonterminalKind::Pat(PatParam { inferred: false }) => sym::pat_param,
1176            NonterminalKind::Expr(Expr2021 { inferred: true } | Expr) => sym::expr,
1177            NonterminalKind::Expr(Expr2021 { inferred: false }) => sym::expr_2021,
1178            NonterminalKind::Ty => sym::ty,
1179            NonterminalKind::Ident => sym::ident,
1180            NonterminalKind::Lifetime => sym::lifetime,
1181            NonterminalKind::Literal => sym::literal,
1182            NonterminalKind::Meta => sym::meta,
1183            NonterminalKind::Path => sym::path,
1184            NonterminalKind::Vis => sym::vis,
1185            NonterminalKind::TT => sym::tt,
1186        }
1187    }
1188}
1189
1190impl fmt::Display for NonterminalKind {
1191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1192        write!(f, "{}", self.symbol())
1193    }
1194}
1195
1196// Some types are used a lot. Make sure they don't unintentionally get bigger.
1197#[cfg(target_pointer_width = "64")]
1198mod size_asserts {
1199    use rustc_data_structures::static_assert_size;
1200
1201    use super::*;
1202    // tidy-alphabetical-start
1203    static_assert_size!(Lit, 12);
1204    static_assert_size!(LitKind, 2);
1205    static_assert_size!(Token, 24);
1206    static_assert_size!(TokenKind, 16);
1207    // tidy-alphabetical-end
1208}