rustc_parse/parser/
token_type.rs

1use rustc_ast::token::TokenKind;
2use rustc_span::symbol::{Symbol, kw, sym};
3
4/// Used in "expected"/"expected one of" error messages. Tokens are added here
5/// as necessary. Tokens with values (e.g. literals, identifiers) are
6/// represented by a single variant (e.g. `Literal`, `Ident`).
7///
8/// It's an awkward representation, but it's important for performance. It's a
9/// C-style parameterless enum so that `TokenTypeSet` can be a bitset. This is
10/// important because `Parser::expected_token_types` is very hot. `TokenType`
11/// used to have variants with parameters (e.g. all the keywords were in a
12/// single `Keyword` variant with a `Symbol` parameter) and
13/// `Parser::expected_token_types` was a `Vec<TokenType>` which was much slower
14/// to manipulate.
15///
16/// We really want to keep the number of variants to 128 or fewer, so that
17/// `TokenTypeSet` can be implemented with a `u128`.
18#[derive(Debug, Clone, Copy, PartialEq)]
19pub enum TokenType {
20    // Expression-operator symbols
21    Eq,
22    Lt,
23    Le,
24    EqEq,
25    Gt,
26    AndAnd,
27    OrOr,
28    Not,
29    Tilde,
30
31    // BinOps
32    Plus,
33    Minus,
34    Star,
35    And,
36    Or,
37
38    // Structural symbols
39    At,
40    Dot,
41    DotDot,
42    DotDotDot,
43    DotDotEq,
44    Comma,
45    Semi,
46    Colon,
47    PathSep,
48    RArrow,
49    FatArrow,
50    Pound,
51    Question,
52    OpenParen,
53    CloseParen,
54    OpenBrace,
55    CloseBrace,
56    OpenBracket,
57    CloseBracket,
58    Eof,
59
60    // Token types with some details elided.
61    /// Any operator.
62    Operator,
63    /// Any identifier token.
64    Ident,
65    /// Any lifetime token.
66    Lifetime,
67    /// Any token that can start a path.
68    Path,
69    /// Any token that can start a type.
70    Type,
71    /// Any token that can start a const expression.
72    Const,
73
74    // Keywords
75    // tidy-alphabetical-start
76    KwAs,
77    KwAsync,
78    KwAuto,
79    KwAwait,
80    KwBecome,
81    KwBox,
82    KwBreak,
83    KwCatch,
84    KwConst,
85    KwContinue,
86    KwContractEnsures,
87    KwContractRequires,
88    KwCrate,
89    KwDefault,
90    KwDyn,
91    KwElse,
92    KwEnum,
93    KwExtern,
94    KwFn,
95    KwFor,
96    KwGen,
97    KwIf,
98    KwImpl,
99    KwIn,
100    KwLet,
101    KwLoop,
102    KwMacro,
103    KwMacroRules,
104    KwMatch,
105    KwMod,
106    KwMove,
107    KwMut,
108    KwPub,
109    KwRaw,
110    KwRef,
111    KwReturn,
112    KwReuse,
113    KwSafe,
114    KwSelfUpper,
115    KwStatic,
116    KwStruct,
117    KwTrait,
118    KwTry,
119    KwType,
120    KwUnderscore,
121    KwUnsafe,
122    KwUse,
123    KwWhere,
124    KwWhile,
125    KwYield,
126    // tidy-alphabetical-end
127
128    // Keyword-like symbols.
129    // tidy-alphabetical-start
130    SymAttSyntax,
131    SymClobberAbi,
132    SymInlateout,
133    SymInout,
134    SymIs,
135    SymLabel,
136    SymLateout,
137    SymMayUnwind,
138    SymNomem,
139    SymNoreturn,
140    SymNostack,
141    SymOptions,
142    SymOut,
143    SymPreservesFlags,
144    SymPure,
145    SymReadonly,
146    SymSym,
147    // tidy-alphabetical-end
148}
149
150// Macro to avoid repetitive boilerplate code.
151macro_rules! from_u32_match {
152    ($val:ident; $($tok:ident,)+) => {
153        // A more obvious formulation would be `0 => TokenType::Eq`. But
154        // this formulation with the guard lets us avoid specifying a
155        // specific integer for each variant.
156        match $val {
157            $(
158                t if t == TokenType::$tok as u32 => TokenType::$tok,
159            )+
160            _ => panic!("unhandled value: {}", $val),
161        }
162    };
163}
164
165impl TokenType {
166    fn from_u32(val: u32) -> TokenType {
167        let token_type = from_u32_match! { val;
168            Eq,
169            Lt,
170            Le,
171            EqEq,
172            Gt,
173            AndAnd,
174            OrOr,
175            Not,
176            Tilde,
177
178            Plus,
179            Minus,
180            Star,
181            And,
182            Or,
183
184            At,
185            Dot,
186            DotDot,
187            DotDotDot,
188            DotDotEq,
189            Comma,
190            Semi,
191            Colon,
192            PathSep,
193            RArrow,
194            FatArrow,
195            Pound,
196            Question,
197            OpenParen,
198            CloseParen,
199            OpenBrace,
200            CloseBrace,
201            OpenBracket,
202            CloseBracket,
203            Eof,
204
205            Operator,
206            Ident,
207            Lifetime,
208            Path,
209            Type,
210            Const,
211
212            KwAs,
213            KwAsync,
214            KwAuto,
215            KwAwait,
216            KwBecome,
217            KwBox,
218            KwBreak,
219            KwCatch,
220            KwConst,
221            KwContinue,
222            KwContractEnsures,
223            KwContractRequires,
224            KwCrate,
225            KwDefault,
226            KwDyn,
227            KwElse,
228            KwEnum,
229            KwExtern,
230            KwFn,
231            KwFor,
232            KwGen,
233            KwIf,
234            KwImpl,
235            KwIn,
236            KwLet,
237            KwLoop,
238            KwMacro,
239            KwMacroRules,
240            KwMatch,
241            KwMod,
242            KwMove,
243            KwMut,
244            KwPub,
245            KwRaw,
246            KwRef,
247            KwReturn,
248            KwReuse,
249            KwSafe,
250            KwSelfUpper,
251            KwStatic,
252            KwStruct,
253            KwTrait,
254            KwTry,
255            KwType,
256            KwUnderscore,
257            KwUnsafe,
258            KwUse,
259            KwWhere,
260            KwWhile,
261            KwYield,
262
263            SymAttSyntax,
264            SymClobberAbi,
265            SymInlateout,
266            SymInout,
267            SymIs,
268            SymLabel,
269            SymLateout,
270            SymMayUnwind,
271            SymNomem,
272            SymNoreturn,
273            SymNostack,
274            SymOptions,
275            SymOut,
276            SymPreservesFlags,
277            SymPure,
278            SymReadonly,
279            SymSym,
280        };
281        token_type
282    }
283
284    pub(super) fn is_keyword(&self) -> Option<Symbol> {
285        match self {
286            TokenType::KwAs => Some(kw::As),
287            TokenType::KwAsync => Some(kw::Async),
288            TokenType::KwAuto => Some(kw::Auto),
289            TokenType::KwAwait => Some(kw::Await),
290            TokenType::KwBecome => Some(kw::Become),
291            TokenType::KwBox => Some(kw::Box),
292            TokenType::KwBreak => Some(kw::Break),
293            TokenType::KwCatch => Some(kw::Catch),
294            TokenType::KwConst => Some(kw::Const),
295            TokenType::KwContinue => Some(kw::Continue),
296            TokenType::KwContractEnsures => Some(kw::ContractEnsures),
297            TokenType::KwContractRequires => Some(kw::ContractRequires),
298            TokenType::KwCrate => Some(kw::Crate),
299            TokenType::KwDefault => Some(kw::Default),
300            TokenType::KwDyn => Some(kw::Dyn),
301            TokenType::KwElse => Some(kw::Else),
302            TokenType::KwEnum => Some(kw::Enum),
303            TokenType::KwExtern => Some(kw::Extern),
304            TokenType::KwFn => Some(kw::Fn),
305            TokenType::KwFor => Some(kw::For),
306            TokenType::KwGen => Some(kw::Gen),
307            TokenType::KwIf => Some(kw::If),
308            TokenType::KwImpl => Some(kw::Impl),
309            TokenType::KwIn => Some(kw::In),
310            TokenType::KwLet => Some(kw::Let),
311            TokenType::KwLoop => Some(kw::Loop),
312            TokenType::KwMacroRules => Some(kw::MacroRules),
313            TokenType::KwMacro => Some(kw::Macro),
314            TokenType::KwMatch => Some(kw::Match),
315            TokenType::KwMod => Some(kw::Mod),
316            TokenType::KwMove => Some(kw::Move),
317            TokenType::KwMut => Some(kw::Mut),
318            TokenType::KwPub => Some(kw::Pub),
319            TokenType::KwRaw => Some(kw::Raw),
320            TokenType::KwRef => Some(kw::Ref),
321            TokenType::KwReturn => Some(kw::Return),
322            TokenType::KwReuse => Some(kw::Reuse),
323            TokenType::KwSafe => Some(kw::Safe),
324            TokenType::KwSelfUpper => Some(kw::SelfUpper),
325            TokenType::KwStatic => Some(kw::Static),
326            TokenType::KwStruct => Some(kw::Struct),
327            TokenType::KwTrait => Some(kw::Trait),
328            TokenType::KwTry => Some(kw::Try),
329            TokenType::KwType => Some(kw::Type),
330            TokenType::KwUnderscore => Some(kw::Underscore),
331            TokenType::KwUnsafe => Some(kw::Unsafe),
332            TokenType::KwUse => Some(kw::Use),
333            TokenType::KwWhere => Some(kw::Where),
334            TokenType::KwWhile => Some(kw::While),
335            TokenType::KwYield => Some(kw::Yield),
336
337            TokenType::SymAttSyntax => Some(sym::att_syntax),
338            TokenType::SymClobberAbi => Some(sym::clobber_abi),
339            TokenType::SymInlateout => Some(sym::inlateout),
340            TokenType::SymInout => Some(sym::inout),
341            TokenType::SymIs => Some(sym::is),
342            TokenType::SymLabel => Some(sym::label),
343            TokenType::SymLateout => Some(sym::lateout),
344            TokenType::SymMayUnwind => Some(sym::may_unwind),
345            TokenType::SymNomem => Some(sym::nomem),
346            TokenType::SymNoreturn => Some(sym::noreturn),
347            TokenType::SymNostack => Some(sym::nostack),
348            TokenType::SymOptions => Some(sym::options),
349            TokenType::SymOut => Some(sym::out),
350            TokenType::SymPreservesFlags => Some(sym::preserves_flags),
351            TokenType::SymPure => Some(sym::pure),
352            TokenType::SymReadonly => Some(sym::readonly),
353            TokenType::SymSym => Some(sym::sym),
354            _ => None,
355        }
356    }
357
358    // The output should be the same as that produced by
359    // `rustc_ast_pretty::pprust::token_to_string`.
360    pub(super) fn to_string(&self) -> String {
361        match self {
362            TokenType::Eq => "`=`",
363            TokenType::Lt => "`<`",
364            TokenType::Le => "`<=`",
365            TokenType::EqEq => "`==`",
366            TokenType::Gt => "`>`",
367            TokenType::AndAnd => "`&&`",
368            TokenType::OrOr => "`||`",
369            TokenType::Not => "`!`",
370            TokenType::Tilde => "`~`",
371
372            TokenType::Plus => "`+`",
373            TokenType::Minus => "`-`",
374            TokenType::Star => "`*`",
375            TokenType::And => "`&`",
376            TokenType::Or => "`|`",
377
378            TokenType::At => "`@`",
379            TokenType::Dot => "`.`",
380            TokenType::DotDot => "`..`",
381            TokenType::DotDotDot => "`...`",
382            TokenType::DotDotEq => "`..=`",
383            TokenType::Comma => "`,`",
384            TokenType::Semi => "`;`",
385            TokenType::Colon => "`:`",
386            TokenType::PathSep => "`::`",
387            TokenType::RArrow => "`->`",
388            TokenType::FatArrow => "`=>`",
389            TokenType::Pound => "`#`",
390            TokenType::Question => "`?`",
391            TokenType::OpenParen => "`(`",
392            TokenType::CloseParen => "`)`",
393            TokenType::OpenBrace => "`{`",
394            TokenType::CloseBrace => "`}`",
395            TokenType::OpenBracket => "`[`",
396            TokenType::CloseBracket => "`]`",
397            TokenType::Eof => "<eof>",
398
399            TokenType::Operator => "an operator",
400            TokenType::Ident => "identifier",
401            TokenType::Lifetime => "lifetime",
402            TokenType::Path => "path",
403            TokenType::Type => "type",
404            TokenType::Const => "a const expression",
405
406            _ => return format!("`{}`", self.is_keyword().unwrap()),
407        }
408        .to_string()
409    }
410}
411
412/// Used by various `Parser` methods such as `check` and `eat`. The first field
413/// is always by used those methods. The second field is only used when the
414/// first field doesn't match.
415#[derive(Clone, Copy, Debug)]
416pub struct ExpTokenPair<'a> {
417    pub tok: &'a TokenKind,
418    pub token_type: TokenType,
419}
420
421/// Used by various `Parser` methods such as `check_keyword` and `eat_keyword`.
422/// The first field is always used by those methods. The second field is only
423/// used when the first field doesn't match.
424#[derive(Clone, Copy)]
425pub struct ExpKeywordPair {
426    pub kw: Symbol,
427    pub token_type: TokenType,
428}
429
430// Gets a statically-known `ExpTokenPair` pair (for non-keywords) or
431// `ExpKeywordPair` (for keywords), as used with various `check`/`expect`
432// methods in `Parser`.
433//
434// The name is short because it's used a lot.
435#[macro_export]
436// We don't use the normal `#[rustfmt::skip]` here because that triggers a
437// bogus "macro-expanded `macro_export` macros from the current crate cannot be
438// referred to by absolute paths" error, ugh. See #52234.
439#[cfg_attr(rustfmt, rustfmt::skip)]
440macro_rules! exp {
441    // `ExpTokenPair` helper rules.
442    (@tok, $tok:ident) => {
443        $crate::parser::token_type::ExpTokenPair {
444            tok: &rustc_ast::token::$tok,
445            token_type: $crate::parser::token_type::TokenType::$tok
446        }
447    };
448    (@binop, $op:ident) => {
449        $crate::parser::token_type::ExpTokenPair {
450            tok: &rustc_ast::token::BinOp(rustc_ast::token::BinOpToken::$op),
451            token_type: $crate::parser::token_type::TokenType::$op,
452        }
453    };
454    (@open, $delim:ident, $token_type:ident) => {
455        $crate::parser::token_type::ExpTokenPair {
456            tok: &rustc_ast::token::OpenDelim(rustc_ast::token::Delimiter::$delim),
457            token_type: $crate::parser::token_type::TokenType::$token_type,
458        }
459    };
460    (@close, $delim:ident, $token_type:ident) => {
461        $crate::parser::token_type::ExpTokenPair {
462            tok: &rustc_ast::token::CloseDelim(rustc_ast::token::Delimiter::$delim),
463            token_type: $crate::parser::token_type::TokenType::$token_type,
464        }
465    };
466
467    // `ExpKeywordPair` helper rules.
468    (@kw, $kw:ident, $token_type:ident) => {
469        $crate::parser::token_type::ExpKeywordPair {
470            kw: rustc_span::symbol::kw::$kw,
471            token_type: $crate::parser::token_type::TokenType::$token_type,
472        }
473    };
474    (@sym, $kw:ident, $token_type:ident) => {
475        $crate::parser::token_type::ExpKeywordPair {
476            kw: rustc_span::symbol::sym::$kw,
477            token_type: $crate::parser::token_type::TokenType::$token_type,
478        }
479    };
480
481    (Eq)             => { exp!(@tok, Eq) };
482    (Lt)             => { exp!(@tok, Lt) };
483    (Le)             => { exp!(@tok, Le) };
484    (EqEq)           => { exp!(@tok, EqEq) };
485    (Gt)             => { exp!(@tok, Gt) };
486    (AndAnd)         => { exp!(@tok, AndAnd) };
487    (OrOr)           => { exp!(@tok, OrOr) };
488    (Not)            => { exp!(@tok, Not) };
489    (Tilde)          => { exp!(@tok, Tilde) };
490    (At)             => { exp!(@tok, At) };
491    (Dot)            => { exp!(@tok, Dot) };
492    (DotDot)         => { exp!(@tok, DotDot) };
493    (DotDotDot)      => { exp!(@tok, DotDotDot) };
494    (DotDotEq)       => { exp!(@tok, DotDotEq) };
495    (Comma)          => { exp!(@tok, Comma) };
496    (Semi)           => { exp!(@tok, Semi) };
497    (Colon)          => { exp!(@tok, Colon) };
498    (PathSep)        => { exp!(@tok, PathSep) };
499    (RArrow)         => { exp!(@tok, RArrow) };
500    (FatArrow)       => { exp!(@tok, FatArrow) };
501    (Pound)          => { exp!(@tok, Pound) };
502    (Question)       => { exp!(@tok, Question) };
503    (Eof)            => { exp!(@tok, Eof) };
504
505    (Plus)           => { exp!(@binop, Plus) };
506    (Minus)          => { exp!(@binop, Minus) };
507    (Star)           => { exp!(@binop, Star) };
508    (And)            => { exp!(@binop, And) };
509    (Or)             => { exp!(@binop, Or) };
510
511    (OpenParen)      => { exp!(@open,  Parenthesis, OpenParen) };
512    (OpenBrace)      => { exp!(@open,  Brace,       OpenBrace) };
513    (OpenBracket)    => { exp!(@open,  Bracket,     OpenBracket) };
514    (CloseParen)     => { exp!(@close, Parenthesis, CloseParen) };
515    (CloseBrace)     => { exp!(@close, Brace,       CloseBrace) };
516    (CloseBracket)   => { exp!(@close, Bracket,     CloseBracket) };
517
518    (As)             => { exp!(@kw, As,         KwAs) };
519    (Async)          => { exp!(@kw, Async,      KwAsync) };
520    (Auto)           => { exp!(@kw, Auto,       KwAuto) };
521    (Await)          => { exp!(@kw, Await,      KwAwait) };
522    (Become)         => { exp!(@kw, Become,     KwBecome) };
523    (Box)            => { exp!(@kw, Box,        KwBox) };
524    (Break)          => { exp!(@kw, Break,      KwBreak) };
525    (Catch)          => { exp!(@kw, Catch,      KwCatch) };
526    (Const)          => { exp!(@kw, Const,      KwConst) };
527    (Continue)       => { exp!(@kw, Continue,   KwContinue) };
528    (ContractEnsures)  => { exp!(@kw, ContractEnsures, KwContractEnsures) };
529    (ContractRequires) => { exp!(@kw, ContractRequires, KwContractRequires) };
530    (Crate)          => { exp!(@kw, Crate,      KwCrate) };
531    (Default)        => { exp!(@kw, Default,    KwDefault) };
532    (Dyn)            => { exp!(@kw, Dyn,        KwDyn) };
533    (Else)           => { exp!(@kw, Else,       KwElse) };
534    (Enum)           => { exp!(@kw, Enum,       KwEnum) };
535    (Extern)         => { exp!(@kw, Extern,     KwExtern) };
536    (Fn)             => { exp!(@kw, Fn,         KwFn) };
537    (For)            => { exp!(@kw, For,        KwFor) };
538    (Gen)            => { exp!(@kw, Gen,        KwGen) };
539    (If)             => { exp!(@kw, If,         KwIf) };
540    (Impl)           => { exp!(@kw, Impl,       KwImpl) };
541    (In)             => { exp!(@kw, In,         KwIn) };
542    (Let)            => { exp!(@kw, Let,        KwLet) };
543    (Loop)           => { exp!(@kw, Loop,       KwLoop) };
544    (Macro)          => { exp!(@kw, Macro,      KwMacro) };
545    (MacroRules)     => { exp!(@kw, MacroRules, KwMacroRules) };
546    (Match)          => { exp!(@kw, Match,      KwMatch) };
547    (Mod)            => { exp!(@kw, Mod,        KwMod) };
548    (Move)           => { exp!(@kw, Move,       KwMove) };
549    (Mut)            => { exp!(@kw, Mut,        KwMut) };
550    (Pub)            => { exp!(@kw, Pub,        KwPub) };
551    (Raw)            => { exp!(@kw, Raw,        KwRaw) };
552    (Ref)            => { exp!(@kw, Ref,        KwRef) };
553    (Return)         => { exp!(@kw, Return,     KwReturn) };
554    (Reuse)          => { exp!(@kw, Reuse,      KwReuse) };
555    (Safe)           => { exp!(@kw, Safe,       KwSafe) };
556    (SelfUpper)      => { exp!(@kw, SelfUpper,  KwSelfUpper) };
557    (Static)         => { exp!(@kw, Static,     KwStatic) };
558    (Struct)         => { exp!(@kw, Struct,     KwStruct) };
559    (Trait)          => { exp!(@kw, Trait,      KwTrait) };
560    (Try)            => { exp!(@kw, Try,        KwTry) };
561    (Type)           => { exp!(@kw, Type,       KwType) };
562    (Underscore)     => { exp!(@kw, Underscore, KwUnderscore) };
563    (Unsafe)         => { exp!(@kw, Unsafe,     KwUnsafe) };
564    (Use)            => { exp!(@kw, Use,        KwUse) };
565    (Where)          => { exp!(@kw, Where,      KwWhere) };
566    (While)          => { exp!(@kw, While,      KwWhile) };
567    (Yield)          => { exp!(@kw, Yield,      KwYield) };
568
569    (AttSyntax)      => { exp!(@sym, att_syntax,      SymAttSyntax) };
570    (ClobberAbi)     => { exp!(@sym, clobber_abi,     SymClobberAbi) };
571    (Inlateout)      => { exp!(@sym, inlateout,       SymInlateout) };
572    (Inout)          => { exp!(@sym, inout,           SymInout) };
573    (Is)             => { exp!(@sym, is,              SymIs) };
574    (Label)          => { exp!(@sym, label,           SymLabel) };
575    (Lateout)        => { exp!(@sym, lateout,         SymLateout) };
576    (MayUnwind)      => { exp!(@sym, may_unwind,      SymMayUnwind) };
577    (Nomem)          => { exp!(@sym, nomem,           SymNomem) };
578    (Noreturn)       => { exp!(@sym, noreturn,        SymNoreturn) };
579    (Nostack)        => { exp!(@sym, nostack,         SymNostack) };
580    (Options)        => { exp!(@sym, options,         SymOptions) };
581    (Out)            => { exp!(@sym, out,             SymOut) };
582    (PreservesFlags) => { exp!(@sym, preserves_flags, SymPreservesFlags) };
583    (Pure)           => { exp!(@sym, pure,            SymPure) };
584    (Readonly)       => { exp!(@sym, readonly,        SymReadonly) };
585    (Sym)            => { exp!(@sym, sym,             SymSym) };
586}
587
588/// A bitset type designed specifically for `Parser::expected_token_types`,
589/// which is very hot. `u128` is the smallest integer that will fit every
590/// `TokenType` value.
591#[derive(Clone, Copy)]
592pub(super) struct TokenTypeSet(u128);
593
594impl TokenTypeSet {
595    pub(super) fn new() -> TokenTypeSet {
596        TokenTypeSet(0)
597    }
598
599    pub(super) fn is_empty(&self) -> bool {
600        self.0 == 0
601    }
602
603    pub(super) fn insert(&mut self, token_type: TokenType) {
604        self.0 = self.0 | (1u128 << token_type as u32)
605    }
606
607    pub(super) fn clear(&mut self) {
608        self.0 = 0
609    }
610
611    pub(super) fn contains(&self, token_type: TokenType) -> bool {
612        self.0 & (1u128 << token_type as u32) != 0
613    }
614
615    pub(super) fn iter(&self) -> TokenTypeSetIter {
616        TokenTypeSetIter(*self)
617    }
618}
619
620// The `TokenTypeSet` is a copy of the set being iterated. It initially holds
621// the entire set. Each bit is cleared as it is returned. We have finished once
622// it is all zeroes.
623pub(super) struct TokenTypeSetIter(TokenTypeSet);
624
625impl Iterator for TokenTypeSetIter {
626    type Item = TokenType;
627
628    fn next(&mut self) -> Option<TokenType> {
629        let num_bits: u32 = (std::mem::size_of_val(&self.0.0) * 8) as u32;
630        assert_eq!(num_bits, 128);
631        let z = self.0.0.trailing_zeros();
632        if z == num_bits {
633            None
634        } else {
635            self.0.0 &= !(1 << z); // clear the trailing 1 bit
636            Some(TokenType::from_u32(z))
637        }
638    }
639}