rustc_parse/parser/
token_type.rs

1use rustc_ast::token::TokenKind;
2use rustc_span::symbol::{Symbol, kw, sym};
3
4/// Used in "expected"/"expected one of" error messages. Tokens are added here
5/// as necessary. Tokens with values (e.g. literals, identifiers) are
6/// represented by a single variant (e.g. `Literal`, `Ident`).
7///
8/// It's an awkward representation, but it's important for performance. It's a
9/// C-style parameterless enum so that `TokenTypeSet` can be a bitset. This is
10/// important because `Parser::expected_token_types` is very hot. `TokenType`
11/// used to have variants with parameters (e.g. all the keywords were in a
12/// single `Keyword` variant with a `Symbol` parameter) and
13/// `Parser::expected_token_types` was a `Vec<TokenType>` which was much slower
14/// to manipulate.
15///
16/// We really want to keep the number of variants to 128 or fewer, so that
17/// `TokenTypeSet` can be implemented with a `u128`.
18#[derive(Debug, Clone, Copy, PartialEq)]
19pub enum TokenType {
20    // Expression-operator symbols
21    Eq,
22    Lt,
23    Le,
24    EqEq,
25    Gt,
26    AndAnd,
27    OrOr,
28    Bang,
29    Tilde,
30
31    // BinOps
32    Plus,
33    Minus,
34    Star,
35    And,
36    Or,
37
38    // Structural symbols
39    At,
40    Dot,
41    DotDot,
42    DotDotDot,
43    DotDotEq,
44    Comma,
45    Semi,
46    Colon,
47    PathSep,
48    RArrow,
49    FatArrow,
50    Pound,
51    Question,
52    OpenParen,
53    CloseParen,
54    OpenBrace,
55    CloseBrace,
56    OpenBracket,
57    CloseBracket,
58    Eof,
59
60    // Token types with some details elided.
61    /// Any operator.
62    Operator,
63    /// Any identifier token.
64    Ident,
65    /// Any lifetime token.
66    Lifetime,
67    /// Any token that can start a path.
68    Path,
69    /// Any token that can start a type.
70    Type,
71    /// Any token that can start a const expression.
72    Const,
73
74    // Keywords
75    // tidy-alphabetical-start
76    KwAs,
77    KwAsync,
78    KwAuto,
79    KwAwait,
80    KwBecome,
81    KwBox,
82    KwBreak,
83    KwCatch,
84    KwConst,
85    KwContinue,
86    KwContractEnsures,
87    KwContractRequires,
88    KwCrate,
89    KwDefault,
90    KwDyn,
91    KwElse,
92    KwEnum,
93    KwExtern,
94    KwFn,
95    KwFor,
96    KwGen,
97    KwIf,
98    KwImpl,
99    KwIn,
100    KwLet,
101    KwLoop,
102    KwMacro,
103    KwMacroRules,
104    KwMatch,
105    KwMod,
106    KwMove,
107    KwMut,
108    KwPub,
109    KwRaw,
110    KwRef,
111    KwReturn,
112    KwReuse,
113    KwSafe,
114    KwSelfUpper,
115    KwStatic,
116    KwStruct,
117    KwSuper,
118    KwTrait,
119    KwTry,
120    KwType,
121    KwUnderscore,
122    KwUnsafe,
123    KwUse,
124    KwWhere,
125    KwWhile,
126    KwYield,
127    // tidy-alphabetical-end
128
129    // Keyword-like symbols.
130    // tidy-alphabetical-start
131    SymAttSyntax,
132    SymBikeshed,
133    SymClobberAbi,
134    SymInlateout,
135    SymInout,
136    SymIs,
137    SymLabel,
138    SymLateout,
139    SymMayUnwind,
140    SymNomem,
141    SymNoreturn,
142    SymNostack,
143    SymNull,
144    SymOptions,
145    SymOut,
146    SymPin,
147    SymPreservesFlags,
148    SymPure,
149    SymReadonly,
150    SymSym,
151    // tidy-alphabetical-end
152}
153
154// Macro to avoid repetitive boilerplate code.
155macro_rules! from_u32_match {
156    ($val:ident; $($tok:ident,)+) => {
157        // A more obvious formulation would be `0 => TokenType::Eq`. But
158        // this formulation with the guard lets us avoid specifying a
159        // specific integer for each variant.
160        match $val {
161            $(
162                t if t == TokenType::$tok as u32 => TokenType::$tok,
163            )+
164            _ => panic!("unhandled value: {}", $val),
165        }
166    };
167}
168
169impl TokenType {
170    fn from_u32(val: u32) -> TokenType {
171        let token_type = from_u32_match! { val;
172            Eq,
173            Lt,
174            Le,
175            EqEq,
176            Gt,
177            AndAnd,
178            OrOr,
179            Bang,
180            Tilde,
181
182            Plus,
183            Minus,
184            Star,
185            And,
186            Or,
187
188            At,
189            Dot,
190            DotDot,
191            DotDotDot,
192            DotDotEq,
193            Comma,
194            Semi,
195            Colon,
196            PathSep,
197            RArrow,
198            FatArrow,
199            Pound,
200            Question,
201            OpenParen,
202            CloseParen,
203            OpenBrace,
204            CloseBrace,
205            OpenBracket,
206            CloseBracket,
207            Eof,
208
209            Operator,
210            Ident,
211            Lifetime,
212            Path,
213            Type,
214            Const,
215
216            KwAs,
217            KwAsync,
218            KwAuto,
219            KwAwait,
220            KwBecome,
221            KwBox,
222            KwBreak,
223            KwCatch,
224            KwConst,
225            KwContinue,
226            KwContractEnsures,
227            KwContractRequires,
228            KwCrate,
229            KwDefault,
230            KwDyn,
231            KwElse,
232            KwEnum,
233            KwExtern,
234            KwFn,
235            KwFor,
236            KwGen,
237            KwIf,
238            KwImpl,
239            KwIn,
240            KwLet,
241            KwLoop,
242            KwMacro,
243            KwMacroRules,
244            KwMatch,
245            KwMod,
246            KwMove,
247            KwMut,
248            KwPub,
249            KwRaw,
250            KwRef,
251            KwReturn,
252            KwReuse,
253            KwSafe,
254            KwSelfUpper,
255            KwStatic,
256            KwStruct,
257            KwSuper,
258            KwTrait,
259            KwTry,
260            KwType,
261            KwUnderscore,
262            KwUnsafe,
263            KwUse,
264            KwWhere,
265            KwWhile,
266            KwYield,
267
268            SymAttSyntax,
269            SymClobberAbi,
270            SymInlateout,
271            SymInout,
272            SymIs,
273            SymLabel,
274            SymLateout,
275            SymMayUnwind,
276            SymNomem,
277            SymNoreturn,
278            SymNostack,
279            SymNull,
280            SymOptions,
281            SymOut,
282            SymPreservesFlags,
283            SymPure,
284            SymReadonly,
285            SymSym,
286        };
287        token_type
288    }
289
290    pub(super) fn is_keyword(&self) -> Option<Symbol> {
291        match self {
292            TokenType::KwAs => Some(kw::As),
293            TokenType::KwAsync => Some(kw::Async),
294            TokenType::KwAuto => Some(kw::Auto),
295            TokenType::KwAwait => Some(kw::Await),
296            TokenType::KwBecome => Some(kw::Become),
297            TokenType::KwBox => Some(kw::Box),
298            TokenType::KwBreak => Some(kw::Break),
299            TokenType::KwCatch => Some(kw::Catch),
300            TokenType::KwConst => Some(kw::Const),
301            TokenType::KwContinue => Some(kw::Continue),
302            TokenType::KwContractEnsures => Some(kw::ContractEnsures),
303            TokenType::KwContractRequires => Some(kw::ContractRequires),
304            TokenType::KwCrate => Some(kw::Crate),
305            TokenType::KwDefault => Some(kw::Default),
306            TokenType::KwDyn => Some(kw::Dyn),
307            TokenType::KwElse => Some(kw::Else),
308            TokenType::KwEnum => Some(kw::Enum),
309            TokenType::KwExtern => Some(kw::Extern),
310            TokenType::KwFn => Some(kw::Fn),
311            TokenType::KwFor => Some(kw::For),
312            TokenType::KwGen => Some(kw::Gen),
313            TokenType::KwIf => Some(kw::If),
314            TokenType::KwImpl => Some(kw::Impl),
315            TokenType::KwIn => Some(kw::In),
316            TokenType::KwLet => Some(kw::Let),
317            TokenType::KwLoop => Some(kw::Loop),
318            TokenType::KwMacroRules => Some(kw::MacroRules),
319            TokenType::KwMacro => Some(kw::Macro),
320            TokenType::KwMatch => Some(kw::Match),
321            TokenType::KwMod => Some(kw::Mod),
322            TokenType::KwMove => Some(kw::Move),
323            TokenType::KwMut => Some(kw::Mut),
324            TokenType::KwPub => Some(kw::Pub),
325            TokenType::KwRaw => Some(kw::Raw),
326            TokenType::KwRef => Some(kw::Ref),
327            TokenType::KwReturn => Some(kw::Return),
328            TokenType::KwReuse => Some(kw::Reuse),
329            TokenType::KwSafe => Some(kw::Safe),
330            TokenType::KwSelfUpper => Some(kw::SelfUpper),
331            TokenType::KwStatic => Some(kw::Static),
332            TokenType::KwStruct => Some(kw::Struct),
333            TokenType::KwSuper => Some(kw::Super),
334            TokenType::KwTrait => Some(kw::Trait),
335            TokenType::KwTry => Some(kw::Try),
336            TokenType::KwType => Some(kw::Type),
337            TokenType::KwUnderscore => Some(kw::Underscore),
338            TokenType::KwUnsafe => Some(kw::Unsafe),
339            TokenType::KwUse => Some(kw::Use),
340            TokenType::KwWhere => Some(kw::Where),
341            TokenType::KwWhile => Some(kw::While),
342            TokenType::KwYield => Some(kw::Yield),
343
344            TokenType::SymAttSyntax => Some(sym::att_syntax),
345            TokenType::SymClobberAbi => Some(sym::clobber_abi),
346            TokenType::SymInlateout => Some(sym::inlateout),
347            TokenType::SymInout => Some(sym::inout),
348            TokenType::SymIs => Some(sym::is),
349            TokenType::SymLabel => Some(sym::label),
350            TokenType::SymLateout => Some(sym::lateout),
351            TokenType::SymMayUnwind => Some(sym::may_unwind),
352            TokenType::SymNomem => Some(sym::nomem),
353            TokenType::SymNoreturn => Some(sym::noreturn),
354            TokenType::SymNostack => Some(sym::nostack),
355            TokenType::SymNull => Some(sym::null),
356            TokenType::SymOptions => Some(sym::options),
357            TokenType::SymOut => Some(sym::out),
358            TokenType::SymPreservesFlags => Some(sym::preserves_flags),
359            TokenType::SymPure => Some(sym::pure),
360            TokenType::SymReadonly => Some(sym::readonly),
361            TokenType::SymSym => Some(sym::sym),
362            _ => None,
363        }
364    }
365
366    // The output should be the same as that produced by
367    // `rustc_ast_pretty::pprust::token_to_string`.
368    pub(super) fn to_string(&self) -> String {
369        match self {
370            TokenType::Eq => "`=`",
371            TokenType::Lt => "`<`",
372            TokenType::Le => "`<=`",
373            TokenType::EqEq => "`==`",
374            TokenType::Gt => "`>`",
375            TokenType::AndAnd => "`&&`",
376            TokenType::OrOr => "`||`",
377            TokenType::Bang => "`!`",
378            TokenType::Tilde => "`~`",
379
380            TokenType::Plus => "`+`",
381            TokenType::Minus => "`-`",
382            TokenType::Star => "`*`",
383            TokenType::And => "`&`",
384            TokenType::Or => "`|`",
385
386            TokenType::At => "`@`",
387            TokenType::Dot => "`.`",
388            TokenType::DotDot => "`..`",
389            TokenType::DotDotDot => "`...`",
390            TokenType::DotDotEq => "`..=`",
391            TokenType::Comma => "`,`",
392            TokenType::Semi => "`;`",
393            TokenType::Colon => "`:`",
394            TokenType::PathSep => "`::`",
395            TokenType::RArrow => "`->`",
396            TokenType::FatArrow => "`=>`",
397            TokenType::Pound => "`#`",
398            TokenType::Question => "`?`",
399            TokenType::OpenParen => "`(`",
400            TokenType::CloseParen => "`)`",
401            TokenType::OpenBrace => "`{`",
402            TokenType::CloseBrace => "`}`",
403            TokenType::OpenBracket => "`[`",
404            TokenType::CloseBracket => "`]`",
405            TokenType::Eof => "<eof>",
406
407            TokenType::Operator => "an operator",
408            TokenType::Ident => "identifier",
409            TokenType::Lifetime => "lifetime",
410            TokenType::Path => "path",
411            TokenType::Type => "type",
412            TokenType::Const => "a const expression",
413
414            _ => return format!("`{}`", self.is_keyword().unwrap()),
415        }
416        .to_string()
417    }
418}
419
420/// Used by various `Parser` methods such as `check` and `eat`. The first field
421/// is always by used those methods. The second field is only used when the
422/// first field doesn't match.
423#[derive(Clone, Copy, Debug)]
424pub struct ExpTokenPair {
425    pub tok: TokenKind,
426    pub token_type: TokenType,
427}
428
429/// Used by various `Parser` methods such as `check_keyword` and `eat_keyword`.
430/// The first field is always used by those methods. The second field is only
431/// used when the first field doesn't match.
432#[derive(Clone, Copy)]
433pub struct ExpKeywordPair {
434    pub kw: Symbol,
435    pub token_type: TokenType,
436}
437
438// Gets a statically-known `ExpTokenPair` pair (for non-keywords) or
439// `ExpKeywordPair` (for keywords), as used with various `check`/`expect`
440// methods in `Parser`.
441//
442// The name is short because it's used a lot.
443#[macro_export]
444// We don't use the normal `#[rustfmt::skip]` here because that triggers a
445// bogus "macro-expanded `macro_export` macros from the current crate cannot be
446// referred to by absolute paths" error, ugh. See #52234.
447#[cfg_attr(rustfmt, rustfmt::skip)]
448macro_rules! exp {
449    // `ExpTokenPair` helper rules.
450    (@tok, $tok:ident) => {
451        $crate::parser::token_type::ExpTokenPair {
452            tok: rustc_ast::token::$tok,
453            token_type: $crate::parser::token_type::TokenType::$tok
454        }
455    };
456
457    // `ExpKeywordPair` helper rules.
458    (@kw, $kw:ident, $token_type:ident) => {
459        $crate::parser::token_type::ExpKeywordPair {
460            kw: rustc_span::symbol::kw::$kw,
461            token_type: $crate::parser::token_type::TokenType::$token_type,
462        }
463    };
464    (@sym, $kw:ident, $token_type:ident) => {
465        $crate::parser::token_type::ExpKeywordPair {
466            kw: rustc_span::symbol::sym::$kw,
467            token_type: $crate::parser::token_type::TokenType::$token_type,
468        }
469    };
470
471    (Eq)             => { exp!(@tok, Eq) };
472    (Lt)             => { exp!(@tok, Lt) };
473    (Le)             => { exp!(@tok, Le) };
474    (EqEq)           => { exp!(@tok, EqEq) };
475    (Gt)             => { exp!(@tok, Gt) };
476    (AndAnd)         => { exp!(@tok, AndAnd) };
477    (OrOr)           => { exp!(@tok, OrOr) };
478    (Bang)           => { exp!(@tok, Bang) };
479    (Tilde)          => { exp!(@tok, Tilde) };
480    (Plus)           => { exp!(@tok, Plus) };
481    (Minus)          => { exp!(@tok, Minus) };
482    (Star)           => { exp!(@tok, Star) };
483    (And)            => { exp!(@tok, And) };
484    (Or)             => { exp!(@tok, Or) };
485    (At)             => { exp!(@tok, At) };
486    (Dot)            => { exp!(@tok, Dot) };
487    (DotDot)         => { exp!(@tok, DotDot) };
488    (DotDotDot)      => { exp!(@tok, DotDotDot) };
489    (DotDotEq)       => { exp!(@tok, DotDotEq) };
490    (Comma)          => { exp!(@tok, Comma) };
491    (Semi)           => { exp!(@tok, Semi) };
492    (Colon)          => { exp!(@tok, Colon) };
493    (PathSep)        => { exp!(@tok, PathSep) };
494    (RArrow)         => { exp!(@tok, RArrow) };
495    (FatArrow)       => { exp!(@tok, FatArrow) };
496    (Pound)          => { exp!(@tok, Pound) };
497    (Question)       => { exp!(@tok, Question) };
498    (Eof)            => { exp!(@tok, Eof) };
499
500    (OpenParen)      => { exp!(@tok, OpenParen) };
501    (OpenBrace)      => { exp!(@tok, OpenBrace) };
502    (OpenBracket)    => { exp!(@tok, OpenBracket) };
503    (CloseParen)     => { exp!(@tok, CloseParen) };
504    (CloseBrace)     => { exp!(@tok, CloseBrace) };
505    (CloseBracket)   => { exp!(@tok, CloseBracket) };
506
507    (As)             => { exp!(@kw, As,         KwAs) };
508    (Async)          => { exp!(@kw, Async,      KwAsync) };
509    (Auto)           => { exp!(@kw, Auto,       KwAuto) };
510    (Await)          => { exp!(@kw, Await,      KwAwait) };
511    (Become)         => { exp!(@kw, Become,     KwBecome) };
512    (Box)            => { exp!(@kw, Box,        KwBox) };
513    (Break)          => { exp!(@kw, Break,      KwBreak) };
514    (Catch)          => { exp!(@kw, Catch,      KwCatch) };
515    (Const)          => { exp!(@kw, Const,      KwConst) };
516    (Continue)       => { exp!(@kw, Continue,   KwContinue) };
517    (ContractEnsures)  => { exp!(@kw, ContractEnsures, KwContractEnsures) };
518    (ContractRequires) => { exp!(@kw, ContractRequires, KwContractRequires) };
519    (Crate)          => { exp!(@kw, Crate,      KwCrate) };
520    (Default)        => { exp!(@kw, Default,    KwDefault) };
521    (Dyn)            => { exp!(@kw, Dyn,        KwDyn) };
522    (Else)           => { exp!(@kw, Else,       KwElse) };
523    (Enum)           => { exp!(@kw, Enum,       KwEnum) };
524    (Extern)         => { exp!(@kw, Extern,     KwExtern) };
525    (Fn)             => { exp!(@kw, Fn,         KwFn) };
526    (For)            => { exp!(@kw, For,        KwFor) };
527    (Gen)            => { exp!(@kw, Gen,        KwGen) };
528    (If)             => { exp!(@kw, If,         KwIf) };
529    (Impl)           => { exp!(@kw, Impl,       KwImpl) };
530    (In)             => { exp!(@kw, In,         KwIn) };
531    (Let)            => { exp!(@kw, Let,        KwLet) };
532    (Loop)           => { exp!(@kw, Loop,       KwLoop) };
533    (Macro)          => { exp!(@kw, Macro,      KwMacro) };
534    (MacroRules)     => { exp!(@kw, MacroRules, KwMacroRules) };
535    (Match)          => { exp!(@kw, Match,      KwMatch) };
536    (Mod)            => { exp!(@kw, Mod,        KwMod) };
537    (Move)           => { exp!(@kw, Move,       KwMove) };
538    (Mut)            => { exp!(@kw, Mut,        KwMut) };
539    (Pub)            => { exp!(@kw, Pub,        KwPub) };
540    (Raw)            => { exp!(@kw, Raw,        KwRaw) };
541    (Ref)            => { exp!(@kw, Ref,        KwRef) };
542    (Return)         => { exp!(@kw, Return,     KwReturn) };
543    (Reuse)          => { exp!(@kw, Reuse,      KwReuse) };
544    (Safe)           => { exp!(@kw, Safe,       KwSafe) };
545    (SelfUpper)      => { exp!(@kw, SelfUpper,  KwSelfUpper) };
546    (Static)         => { exp!(@kw, Static,     KwStatic) };
547    (Struct)         => { exp!(@kw, Struct,     KwStruct) };
548    (Super)          => { exp!(@kw, Super,      KwSuper) };
549    (Trait)          => { exp!(@kw, Trait,      KwTrait) };
550    (Try)            => { exp!(@kw, Try,        KwTry) };
551    (Type)           => { exp!(@kw, Type,       KwType) };
552    (Underscore)     => { exp!(@kw, Underscore, KwUnderscore) };
553    (Unsafe)         => { exp!(@kw, Unsafe,     KwUnsafe) };
554    (Use)            => { exp!(@kw, Use,        KwUse) };
555    (Where)          => { exp!(@kw, Where,      KwWhere) };
556    (While)          => { exp!(@kw, While,      KwWhile) };
557    (Yield)          => { exp!(@kw, Yield,      KwYield) };
558
559    (AttSyntax)      => { exp!(@sym, att_syntax,      SymAttSyntax) };
560    (Bikeshed)       => { exp!(@sym, bikeshed,        SymBikeshed) };
561    (ClobberAbi)     => { exp!(@sym, clobber_abi,     SymClobberAbi) };
562    (Inlateout)      => { exp!(@sym, inlateout,       SymInlateout) };
563    (Inout)          => { exp!(@sym, inout,           SymInout) };
564    (Is)             => { exp!(@sym, is,              SymIs) };
565    (Label)          => { exp!(@sym, label,           SymLabel) };
566    (Lateout)        => { exp!(@sym, lateout,         SymLateout) };
567    (MayUnwind)      => { exp!(@sym, may_unwind,      SymMayUnwind) };
568    (Nomem)          => { exp!(@sym, nomem,           SymNomem) };
569    (Noreturn)       => { exp!(@sym, noreturn,        SymNoreturn) };
570    (Nostack)        => { exp!(@sym, nostack,         SymNostack) };
571    (Null)           => { exp!(@sym, null,            SymNull) };
572    (Options)        => { exp!(@sym, options,         SymOptions) };
573    (Out)            => { exp!(@sym, out,             SymOut) };
574    (Pin)            => { exp!(@sym, pin,             SymPin) };
575    (PreservesFlags) => { exp!(@sym, preserves_flags, SymPreservesFlags) };
576    (Pure)           => { exp!(@sym, pure,            SymPure) };
577    (Readonly)       => { exp!(@sym, readonly,        SymReadonly) };
578    (Sym)            => { exp!(@sym, sym,             SymSym) };
579}
580
581/// A bitset type designed specifically for `Parser::expected_token_types`,
582/// which is very hot. `u128` is the smallest integer that will fit every
583/// `TokenType` value.
584#[derive(Clone, Copy)]
585pub(super) struct TokenTypeSet(u128);
586
587impl TokenTypeSet {
588    pub(super) fn new() -> TokenTypeSet {
589        TokenTypeSet(0)
590    }
591
592    pub(super) fn is_empty(&self) -> bool {
593        self.0 == 0
594    }
595
596    pub(super) fn insert(&mut self, token_type: TokenType) {
597        self.0 = self.0 | (1u128 << token_type as u32)
598    }
599
600    pub(super) fn clear(&mut self) {
601        self.0 = 0
602    }
603
604    pub(super) fn contains(&self, token_type: TokenType) -> bool {
605        self.0 & (1u128 << token_type as u32) != 0
606    }
607
608    pub(super) fn iter(&self) -> TokenTypeSetIter {
609        TokenTypeSetIter(*self)
610    }
611}
612
613// The `TokenTypeSet` is a copy of the set being iterated. It initially holds
614// the entire set. Each bit is cleared as it is returned. We have finished once
615// it is all zeroes.
616pub(super) struct TokenTypeSetIter(TokenTypeSet);
617
618impl Iterator for TokenTypeSetIter {
619    type Item = TokenType;
620
621    fn next(&mut self) -> Option<TokenType> {
622        let num_bits: u32 = (size_of_val(&self.0.0) * 8) as u32;
623        assert_eq!(num_bits, 128);
624        let z = self.0.0.trailing_zeros();
625        if z == num_bits {
626            None
627        } else {
628            self.0.0 &= !(1 << z); // clear the trailing 1 bit
629            Some(TokenType::from_u32(z))
630        }
631    }
632}