rustc_parse/parser/
token_type.rs

1use rustc_ast::token::TokenKind;
2use rustc_span::symbol::{Symbol, kw, sym};
3
4/// Used in "expected"/"expected one of" error messages. Tokens are added here
5/// as necessary. Tokens with values (e.g. literals, identifiers) are
6/// represented by a single variant (e.g. `Literal`, `Ident`).
7///
8/// It's an awkward representation, but it's important for performance. It's a
9/// C-style parameterless enum so that `TokenTypeSet` can be a bitset. This is
10/// important because `Parser::expected_token_types` is very hot. `TokenType`
11/// used to have variants with parameters (e.g. all the keywords were in a
12/// single `Keyword` variant with a `Symbol` parameter) and
13/// `Parser::expected_token_types` was a `Vec<TokenType>` which was much slower
14/// to manipulate.
15///
16/// We really want to keep the number of variants to 128 or fewer, so that
17/// `TokenTypeSet` can be implemented with a `u128`.
18#[derive(Debug, Clone, Copy, PartialEq)]
19pub enum TokenType {
20    // Expression-operator symbols
21    Eq,
22    Lt,
23    Le,
24    EqEq,
25    Gt,
26    AndAnd,
27    OrOr,
28    Bang,
29    Tilde,
30
31    // BinOps
32    Plus,
33    Minus,
34    Star,
35    And,
36    Or,
37
38    // Structural symbols
39    At,
40    Dot,
41    DotDot,
42    DotDotDot,
43    DotDotEq,
44    Comma,
45    Semi,
46    Colon,
47    PathSep,
48    RArrow,
49    FatArrow,
50    Pound,
51    Question,
52    OpenParen,
53    CloseParen,
54    OpenBrace,
55    CloseBrace,
56    OpenBracket,
57    CloseBracket,
58    Eof,
59
60    // Token types with some details elided.
61    /// Any operator.
62    Operator,
63    /// Any identifier token.
64    Ident,
65    /// Any lifetime token.
66    Lifetime,
67    /// Any token that can start a path.
68    Path,
69    /// Any token that can start a type.
70    Type,
71    /// Any token that can start a const expression.
72    Const,
73
74    // Keywords
75    // tidy-alphabetical-start
76    KwAs,
77    KwAsync,
78    KwAuto,
79    KwAwait,
80    KwBecome,
81    KwBox,
82    KwBreak,
83    KwCatch,
84    KwConst,
85    KwContinue,
86    KwContractEnsures,
87    KwContractRequires,
88    KwCrate,
89    KwDefault,
90    KwDyn,
91    KwElse,
92    KwEnum,
93    KwExtern,
94    KwFn,
95    KwFor,
96    KwGen,
97    KwIf,
98    KwImpl,
99    KwIn,
100    KwLet,
101    KwLoop,
102    KwMacro,
103    KwMacroRules,
104    KwMatch,
105    KwMod,
106    KwMove,
107    KwMut,
108    KwPub,
109    KwRaw,
110    KwRef,
111    KwReturn,
112    KwReuse,
113    KwSafe,
114    KwSelfUpper,
115    KwStatic,
116    KwStruct,
117    KwSuper,
118    KwTrait,
119    KwTry,
120    KwType,
121    KwUnderscore,
122    KwUnsafe,
123    KwUse,
124    KwWhere,
125    KwWhile,
126    KwYield,
127    // tidy-alphabetical-end
128
129    // Keyword-like symbols.
130    // tidy-alphabetical-start
131    SymAttSyntax,
132    SymClobberAbi,
133    SymInlateout,
134    SymInout,
135    SymIs,
136    SymLabel,
137    SymLateout,
138    SymMayUnwind,
139    SymNomem,
140    SymNoreturn,
141    SymNostack,
142    SymOptions,
143    SymOut,
144    SymPreservesFlags,
145    SymPure,
146    SymReadonly,
147    SymSym,
148    // tidy-alphabetical-end
149}
150
151// Macro to avoid repetitive boilerplate code.
152macro_rules! from_u32_match {
153    ($val:ident; $($tok:ident,)+) => {
154        // A more obvious formulation would be `0 => TokenType::Eq`. But
155        // this formulation with the guard lets us avoid specifying a
156        // specific integer for each variant.
157        match $val {
158            $(
159                t if t == TokenType::$tok as u32 => TokenType::$tok,
160            )+
161            _ => panic!("unhandled value: {}", $val),
162        }
163    };
164}
165
166impl TokenType {
167    fn from_u32(val: u32) -> TokenType {
168        let token_type = from_u32_match! { val;
169            Eq,
170            Lt,
171            Le,
172            EqEq,
173            Gt,
174            AndAnd,
175            OrOr,
176            Bang,
177            Tilde,
178
179            Plus,
180            Minus,
181            Star,
182            And,
183            Or,
184
185            At,
186            Dot,
187            DotDot,
188            DotDotDot,
189            DotDotEq,
190            Comma,
191            Semi,
192            Colon,
193            PathSep,
194            RArrow,
195            FatArrow,
196            Pound,
197            Question,
198            OpenParen,
199            CloseParen,
200            OpenBrace,
201            CloseBrace,
202            OpenBracket,
203            CloseBracket,
204            Eof,
205
206            Operator,
207            Ident,
208            Lifetime,
209            Path,
210            Type,
211            Const,
212
213            KwAs,
214            KwAsync,
215            KwAuto,
216            KwAwait,
217            KwBecome,
218            KwBox,
219            KwBreak,
220            KwCatch,
221            KwConst,
222            KwContinue,
223            KwContractEnsures,
224            KwContractRequires,
225            KwCrate,
226            KwDefault,
227            KwDyn,
228            KwElse,
229            KwEnum,
230            KwExtern,
231            KwFn,
232            KwFor,
233            KwGen,
234            KwIf,
235            KwImpl,
236            KwIn,
237            KwLet,
238            KwLoop,
239            KwMacro,
240            KwMacroRules,
241            KwMatch,
242            KwMod,
243            KwMove,
244            KwMut,
245            KwPub,
246            KwRaw,
247            KwRef,
248            KwReturn,
249            KwReuse,
250            KwSafe,
251            KwSelfUpper,
252            KwStatic,
253            KwStruct,
254            KwSuper,
255            KwTrait,
256            KwTry,
257            KwType,
258            KwUnderscore,
259            KwUnsafe,
260            KwUse,
261            KwWhere,
262            KwWhile,
263            KwYield,
264
265            SymAttSyntax,
266            SymClobberAbi,
267            SymInlateout,
268            SymInout,
269            SymIs,
270            SymLabel,
271            SymLateout,
272            SymMayUnwind,
273            SymNomem,
274            SymNoreturn,
275            SymNostack,
276            SymOptions,
277            SymOut,
278            SymPreservesFlags,
279            SymPure,
280            SymReadonly,
281            SymSym,
282        };
283        token_type
284    }
285
286    pub(super) fn is_keyword(&self) -> Option<Symbol> {
287        match self {
288            TokenType::KwAs => Some(kw::As),
289            TokenType::KwAsync => Some(kw::Async),
290            TokenType::KwAuto => Some(kw::Auto),
291            TokenType::KwAwait => Some(kw::Await),
292            TokenType::KwBecome => Some(kw::Become),
293            TokenType::KwBox => Some(kw::Box),
294            TokenType::KwBreak => Some(kw::Break),
295            TokenType::KwCatch => Some(kw::Catch),
296            TokenType::KwConst => Some(kw::Const),
297            TokenType::KwContinue => Some(kw::Continue),
298            TokenType::KwContractEnsures => Some(kw::ContractEnsures),
299            TokenType::KwContractRequires => Some(kw::ContractRequires),
300            TokenType::KwCrate => Some(kw::Crate),
301            TokenType::KwDefault => Some(kw::Default),
302            TokenType::KwDyn => Some(kw::Dyn),
303            TokenType::KwElse => Some(kw::Else),
304            TokenType::KwEnum => Some(kw::Enum),
305            TokenType::KwExtern => Some(kw::Extern),
306            TokenType::KwFn => Some(kw::Fn),
307            TokenType::KwFor => Some(kw::For),
308            TokenType::KwGen => Some(kw::Gen),
309            TokenType::KwIf => Some(kw::If),
310            TokenType::KwImpl => Some(kw::Impl),
311            TokenType::KwIn => Some(kw::In),
312            TokenType::KwLet => Some(kw::Let),
313            TokenType::KwLoop => Some(kw::Loop),
314            TokenType::KwMacroRules => Some(kw::MacroRules),
315            TokenType::KwMacro => Some(kw::Macro),
316            TokenType::KwMatch => Some(kw::Match),
317            TokenType::KwMod => Some(kw::Mod),
318            TokenType::KwMove => Some(kw::Move),
319            TokenType::KwMut => Some(kw::Mut),
320            TokenType::KwPub => Some(kw::Pub),
321            TokenType::KwRaw => Some(kw::Raw),
322            TokenType::KwRef => Some(kw::Ref),
323            TokenType::KwReturn => Some(kw::Return),
324            TokenType::KwReuse => Some(kw::Reuse),
325            TokenType::KwSafe => Some(kw::Safe),
326            TokenType::KwSelfUpper => Some(kw::SelfUpper),
327            TokenType::KwStatic => Some(kw::Static),
328            TokenType::KwStruct => Some(kw::Struct),
329            TokenType::KwSuper => Some(kw::Super),
330            TokenType::KwTrait => Some(kw::Trait),
331            TokenType::KwTry => Some(kw::Try),
332            TokenType::KwType => Some(kw::Type),
333            TokenType::KwUnderscore => Some(kw::Underscore),
334            TokenType::KwUnsafe => Some(kw::Unsafe),
335            TokenType::KwUse => Some(kw::Use),
336            TokenType::KwWhere => Some(kw::Where),
337            TokenType::KwWhile => Some(kw::While),
338            TokenType::KwYield => Some(kw::Yield),
339
340            TokenType::SymAttSyntax => Some(sym::att_syntax),
341            TokenType::SymClobberAbi => Some(sym::clobber_abi),
342            TokenType::SymInlateout => Some(sym::inlateout),
343            TokenType::SymInout => Some(sym::inout),
344            TokenType::SymIs => Some(sym::is),
345            TokenType::SymLabel => Some(sym::label),
346            TokenType::SymLateout => Some(sym::lateout),
347            TokenType::SymMayUnwind => Some(sym::may_unwind),
348            TokenType::SymNomem => Some(sym::nomem),
349            TokenType::SymNoreturn => Some(sym::noreturn),
350            TokenType::SymNostack => Some(sym::nostack),
351            TokenType::SymOptions => Some(sym::options),
352            TokenType::SymOut => Some(sym::out),
353            TokenType::SymPreservesFlags => Some(sym::preserves_flags),
354            TokenType::SymPure => Some(sym::pure),
355            TokenType::SymReadonly => Some(sym::readonly),
356            TokenType::SymSym => Some(sym::sym),
357            _ => None,
358        }
359    }
360
361    // The output should be the same as that produced by
362    // `rustc_ast_pretty::pprust::token_to_string`.
363    pub(super) fn to_string(&self) -> String {
364        match self {
365            TokenType::Eq => "`=`",
366            TokenType::Lt => "`<`",
367            TokenType::Le => "`<=`",
368            TokenType::EqEq => "`==`",
369            TokenType::Gt => "`>`",
370            TokenType::AndAnd => "`&&`",
371            TokenType::OrOr => "`||`",
372            TokenType::Bang => "`!`",
373            TokenType::Tilde => "`~`",
374
375            TokenType::Plus => "`+`",
376            TokenType::Minus => "`-`",
377            TokenType::Star => "`*`",
378            TokenType::And => "`&`",
379            TokenType::Or => "`|`",
380
381            TokenType::At => "`@`",
382            TokenType::Dot => "`.`",
383            TokenType::DotDot => "`..`",
384            TokenType::DotDotDot => "`...`",
385            TokenType::DotDotEq => "`..=`",
386            TokenType::Comma => "`,`",
387            TokenType::Semi => "`;`",
388            TokenType::Colon => "`:`",
389            TokenType::PathSep => "`::`",
390            TokenType::RArrow => "`->`",
391            TokenType::FatArrow => "`=>`",
392            TokenType::Pound => "`#`",
393            TokenType::Question => "`?`",
394            TokenType::OpenParen => "`(`",
395            TokenType::CloseParen => "`)`",
396            TokenType::OpenBrace => "`{`",
397            TokenType::CloseBrace => "`}`",
398            TokenType::OpenBracket => "`[`",
399            TokenType::CloseBracket => "`]`",
400            TokenType::Eof => "<eof>",
401
402            TokenType::Operator => "an operator",
403            TokenType::Ident => "identifier",
404            TokenType::Lifetime => "lifetime",
405            TokenType::Path => "path",
406            TokenType::Type => "type",
407            TokenType::Const => "a const expression",
408
409            _ => return format!("`{}`", self.is_keyword().unwrap()),
410        }
411        .to_string()
412    }
413}
414
415/// Used by various `Parser` methods such as `check` and `eat`. The first field
416/// is always by used those methods. The second field is only used when the
417/// first field doesn't match.
418#[derive(Clone, Copy, Debug)]
419pub struct ExpTokenPair<'a> {
420    pub tok: &'a TokenKind,
421    pub token_type: TokenType,
422}
423
424/// Used by various `Parser` methods such as `check_keyword` and `eat_keyword`.
425/// The first field is always used by those methods. The second field is only
426/// used when the first field doesn't match.
427#[derive(Clone, Copy)]
428pub struct ExpKeywordPair {
429    pub kw: Symbol,
430    pub token_type: TokenType,
431}
432
433// Gets a statically-known `ExpTokenPair` pair (for non-keywords) or
434// `ExpKeywordPair` (for keywords), as used with various `check`/`expect`
435// methods in `Parser`.
436//
437// The name is short because it's used a lot.
438#[macro_export]
439// We don't use the normal `#[rustfmt::skip]` here because that triggers a
440// bogus "macro-expanded `macro_export` macros from the current crate cannot be
441// referred to by absolute paths" error, ugh. See #52234.
442#[cfg_attr(rustfmt, rustfmt::skip)]
443macro_rules! exp {
444    // `ExpTokenPair` helper rules.
445    (@tok, $tok:ident) => {
446        $crate::parser::token_type::ExpTokenPair {
447            tok: &rustc_ast::token::$tok,
448            token_type: $crate::parser::token_type::TokenType::$tok
449        }
450    };
451
452    // `ExpKeywordPair` helper rules.
453    (@kw, $kw:ident, $token_type:ident) => {
454        $crate::parser::token_type::ExpKeywordPair {
455            kw: rustc_span::symbol::kw::$kw,
456            token_type: $crate::parser::token_type::TokenType::$token_type,
457        }
458    };
459    (@sym, $kw:ident, $token_type:ident) => {
460        $crate::parser::token_type::ExpKeywordPair {
461            kw: rustc_span::symbol::sym::$kw,
462            token_type: $crate::parser::token_type::TokenType::$token_type,
463        }
464    };
465
466    (Eq)             => { exp!(@tok, Eq) };
467    (Lt)             => { exp!(@tok, Lt) };
468    (Le)             => { exp!(@tok, Le) };
469    (EqEq)           => { exp!(@tok, EqEq) };
470    (Gt)             => { exp!(@tok, Gt) };
471    (AndAnd)         => { exp!(@tok, AndAnd) };
472    (OrOr)           => { exp!(@tok, OrOr) };
473    (Bang)           => { exp!(@tok, Bang) };
474    (Tilde)          => { exp!(@tok, Tilde) };
475    (Plus)           => { exp!(@tok, Plus) };
476    (Minus)          => { exp!(@tok, Minus) };
477    (Star)           => { exp!(@tok, Star) };
478    (And)            => { exp!(@tok, And) };
479    (Or)             => { exp!(@tok, Or) };
480    (At)             => { exp!(@tok, At) };
481    (Dot)            => { exp!(@tok, Dot) };
482    (DotDot)         => { exp!(@tok, DotDot) };
483    (DotDotDot)      => { exp!(@tok, DotDotDot) };
484    (DotDotEq)       => { exp!(@tok, DotDotEq) };
485    (Comma)          => { exp!(@tok, Comma) };
486    (Semi)           => { exp!(@tok, Semi) };
487    (Colon)          => { exp!(@tok, Colon) };
488    (PathSep)        => { exp!(@tok, PathSep) };
489    (RArrow)         => { exp!(@tok, RArrow) };
490    (FatArrow)       => { exp!(@tok, FatArrow) };
491    (Pound)          => { exp!(@tok, Pound) };
492    (Question)       => { exp!(@tok, Question) };
493    (Eof)            => { exp!(@tok, Eof) };
494
495    (OpenParen)      => { exp!(@tok, OpenParen) };
496    (OpenBrace)      => { exp!(@tok, OpenBrace) };
497    (OpenBracket)    => { exp!(@tok, OpenBracket) };
498    (CloseParen)     => { exp!(@tok, CloseParen) };
499    (CloseBrace)     => { exp!(@tok, CloseBrace) };
500    (CloseBracket)   => { exp!(@tok, CloseBracket) };
501
502    (As)             => { exp!(@kw, As,         KwAs) };
503    (Async)          => { exp!(@kw, Async,      KwAsync) };
504    (Auto)           => { exp!(@kw, Auto,       KwAuto) };
505    (Await)          => { exp!(@kw, Await,      KwAwait) };
506    (Become)         => { exp!(@kw, Become,     KwBecome) };
507    (Box)            => { exp!(@kw, Box,        KwBox) };
508    (Break)          => { exp!(@kw, Break,      KwBreak) };
509    (Catch)          => { exp!(@kw, Catch,      KwCatch) };
510    (Const)          => { exp!(@kw, Const,      KwConst) };
511    (Continue)       => { exp!(@kw, Continue,   KwContinue) };
512    (ContractEnsures)  => { exp!(@kw, ContractEnsures, KwContractEnsures) };
513    (ContractRequires) => { exp!(@kw, ContractRequires, KwContractRequires) };
514    (Crate)          => { exp!(@kw, Crate,      KwCrate) };
515    (Default)        => { exp!(@kw, Default,    KwDefault) };
516    (Dyn)            => { exp!(@kw, Dyn,        KwDyn) };
517    (Else)           => { exp!(@kw, Else,       KwElse) };
518    (Enum)           => { exp!(@kw, Enum,       KwEnum) };
519    (Extern)         => { exp!(@kw, Extern,     KwExtern) };
520    (Fn)             => { exp!(@kw, Fn,         KwFn) };
521    (For)            => { exp!(@kw, For,        KwFor) };
522    (Gen)            => { exp!(@kw, Gen,        KwGen) };
523    (If)             => { exp!(@kw, If,         KwIf) };
524    (Impl)           => { exp!(@kw, Impl,       KwImpl) };
525    (In)             => { exp!(@kw, In,         KwIn) };
526    (Let)            => { exp!(@kw, Let,        KwLet) };
527    (Loop)           => { exp!(@kw, Loop,       KwLoop) };
528    (Macro)          => { exp!(@kw, Macro,      KwMacro) };
529    (MacroRules)     => { exp!(@kw, MacroRules, KwMacroRules) };
530    (Match)          => { exp!(@kw, Match,      KwMatch) };
531    (Mod)            => { exp!(@kw, Mod,        KwMod) };
532    (Move)           => { exp!(@kw, Move,       KwMove) };
533    (Mut)            => { exp!(@kw, Mut,        KwMut) };
534    (Pub)            => { exp!(@kw, Pub,        KwPub) };
535    (Raw)            => { exp!(@kw, Raw,        KwRaw) };
536    (Ref)            => { exp!(@kw, Ref,        KwRef) };
537    (Return)         => { exp!(@kw, Return,     KwReturn) };
538    (Reuse)          => { exp!(@kw, Reuse,      KwReuse) };
539    (Safe)           => { exp!(@kw, Safe,       KwSafe) };
540    (SelfUpper)      => { exp!(@kw, SelfUpper,  KwSelfUpper) };
541    (Static)         => { exp!(@kw, Static,     KwStatic) };
542    (Struct)         => { exp!(@kw, Struct,     KwStruct) };
543    (Super)          => { exp!(@kw, Super,      KwSuper) };
544    (Trait)          => { exp!(@kw, Trait,      KwTrait) };
545    (Try)            => { exp!(@kw, Try,        KwTry) };
546    (Type)           => { exp!(@kw, Type,       KwType) };
547    (Underscore)     => { exp!(@kw, Underscore, KwUnderscore) };
548    (Unsafe)         => { exp!(@kw, Unsafe,     KwUnsafe) };
549    (Use)            => { exp!(@kw, Use,        KwUse) };
550    (Where)          => { exp!(@kw, Where,      KwWhere) };
551    (While)          => { exp!(@kw, While,      KwWhile) };
552    (Yield)          => { exp!(@kw, Yield,      KwYield) };
553
554    (AttSyntax)      => { exp!(@sym, att_syntax,      SymAttSyntax) };
555    (ClobberAbi)     => { exp!(@sym, clobber_abi,     SymClobberAbi) };
556    (Inlateout)      => { exp!(@sym, inlateout,       SymInlateout) };
557    (Inout)          => { exp!(@sym, inout,           SymInout) };
558    (Is)             => { exp!(@sym, is,              SymIs) };
559    (Label)          => { exp!(@sym, label,           SymLabel) };
560    (Lateout)        => { exp!(@sym, lateout,         SymLateout) };
561    (MayUnwind)      => { exp!(@sym, may_unwind,      SymMayUnwind) };
562    (Nomem)          => { exp!(@sym, nomem,           SymNomem) };
563    (Noreturn)       => { exp!(@sym, noreturn,        SymNoreturn) };
564    (Nostack)        => { exp!(@sym, nostack,         SymNostack) };
565    (Options)        => { exp!(@sym, options,         SymOptions) };
566    (Out)            => { exp!(@sym, out,             SymOut) };
567    (PreservesFlags) => { exp!(@sym, preserves_flags, SymPreservesFlags) };
568    (Pure)           => { exp!(@sym, pure,            SymPure) };
569    (Readonly)       => { exp!(@sym, readonly,        SymReadonly) };
570    (Sym)            => { exp!(@sym, sym,             SymSym) };
571}
572
573/// A bitset type designed specifically for `Parser::expected_token_types`,
574/// which is very hot. `u128` is the smallest integer that will fit every
575/// `TokenType` value.
576#[derive(Clone, Copy)]
577pub(super) struct TokenTypeSet(u128);
578
579impl TokenTypeSet {
580    pub(super) fn new() -> TokenTypeSet {
581        TokenTypeSet(0)
582    }
583
584    pub(super) fn is_empty(&self) -> bool {
585        self.0 == 0
586    }
587
588    pub(super) fn insert(&mut self, token_type: TokenType) {
589        self.0 = self.0 | (1u128 << token_type as u32)
590    }
591
592    pub(super) fn clear(&mut self) {
593        self.0 = 0
594    }
595
596    pub(super) fn contains(&self, token_type: TokenType) -> bool {
597        self.0 & (1u128 << token_type as u32) != 0
598    }
599
600    pub(super) fn iter(&self) -> TokenTypeSetIter {
601        TokenTypeSetIter(*self)
602    }
603}
604
605// The `TokenTypeSet` is a copy of the set being iterated. It initially holds
606// the entire set. Each bit is cleared as it is returned. We have finished once
607// it is all zeroes.
608pub(super) struct TokenTypeSetIter(TokenTypeSet);
609
610impl Iterator for TokenTypeSetIter {
611    type Item = TokenType;
612
613    fn next(&mut self) -> Option<TokenType> {
614        let num_bits: u32 = (size_of_val(&self.0.0) * 8) as u32;
615        assert_eq!(num_bits, 128);
616        let z = self.0.0.trailing_zeros();
617        if z == num_bits {
618            None
619        } else {
620            self.0.0 &= !(1 << z); // clear the trailing 1 bit
621            Some(TokenType::from_u32(z))
622        }
623    }
624}