rustc_parse/parser/
token_type.rs

1use rustc_ast::token::TokenKind;
2use rustc_span::symbol::{Symbol, kw, sym};
3
4/// Used in "expected"/"expected one of" error messages. Tokens are added here
5/// as necessary. Tokens with values (e.g. literals, identifiers) are
6/// represented by a single variant (e.g. `Literal`, `Ident`).
7///
8/// It's an awkward representation, but it's important for performance. It's a
9/// C-style parameterless enum so that `TokenTypeSet` can be a bitset. This is
10/// important because `Parser::expected_token_types` is very hot. `TokenType`
11/// used to have variants with parameters (e.g. all the keywords were in a
12/// single `Keyword` variant with a `Symbol` parameter) and
13/// `Parser::expected_token_types` was a `Vec<TokenType>` which was much slower
14/// to manipulate.
15///
16/// We really want to keep the number of variants to 128 or fewer, so that
17/// `TokenTypeSet` can be implemented with a `u128`.
18#[derive(Debug, Clone, Copy, PartialEq)]
19pub enum TokenType {
20    // Expression-operator symbols
21    Eq,
22    Lt,
23    Le,
24    EqEq,
25    Gt,
26    AndAnd,
27    OrOr,
28    Bang,
29    Tilde,
30
31    // BinOps
32    Plus,
33    Minus,
34    Star,
35    And,
36    Or,
37
38    // Structural symbols
39    At,
40    Dot,
41    DotDot,
42    DotDotDot,
43    DotDotEq,
44    Comma,
45    Semi,
46    Colon,
47    PathSep,
48    RArrow,
49    FatArrow,
50    Pound,
51    Question,
52    OpenParen,
53    CloseParen,
54    OpenBrace,
55    CloseBrace,
56    OpenBracket,
57    CloseBracket,
58    Eof,
59
60    // Token types with some details elided.
61    /// Any operator.
62    Operator,
63    /// Any identifier token.
64    Ident,
65    /// Any lifetime token.
66    Lifetime,
67    /// Any token that can start a path.
68    Path,
69    /// Any token that can start a type.
70    Type,
71    /// Any token that can start a const expression.
72    Const,
73
74    // Keywords
75    // tidy-alphabetical-start
76    KwAs,
77    KwAsync,
78    KwAuto,
79    KwAwait,
80    KwBecome,
81    KwBox,
82    KwBreak,
83    KwCatch,
84    KwConst,
85    KwContinue,
86    KwContractEnsures,
87    KwContractRequires,
88    KwCrate,
89    KwDefault,
90    KwDyn,
91    KwElse,
92    KwEnum,
93    KwExtern,
94    KwFn,
95    KwFor,
96    KwGen,
97    KwIf,
98    KwImpl,
99    KwIn,
100    KwLet,
101    KwLoop,
102    KwMacro,
103    KwMacroRules,
104    KwMatch,
105    KwMod,
106    KwMove,
107    KwMut,
108    KwPub,
109    KwRaw,
110    KwRef,
111    KwReturn,
112    KwReuse,
113    KwSafe,
114    KwSelfUpper,
115    KwStatic,
116    KwStruct,
117    KwSuper,
118    KwTrait,
119    KwTry,
120    KwType,
121    KwUnderscore,
122    KwUnsafe,
123    KwUse,
124    KwWhere,
125    KwWhile,
126    KwYield,
127    // tidy-alphabetical-end
128
129    // Keyword-like symbols.
130    // tidy-alphabetical-start
131    SymAttSyntax,
132    SymClobberAbi,
133    SymInlateout,
134    SymInout,
135    SymIs,
136    SymLabel,
137    SymLateout,
138    SymMayUnwind,
139    SymNomem,
140    SymNoreturn,
141    SymNostack,
142    SymNull,
143    SymOptions,
144    SymOut,
145    SymPin,
146    SymPreservesFlags,
147    SymPure,
148    SymReadonly,
149    SymSym,
150    // tidy-alphabetical-end
151}
152
153// Macro to avoid repetitive boilerplate code.
154macro_rules! from_u32_match {
155    ($val:ident; $($tok:ident,)+) => {
156        // A more obvious formulation would be `0 => TokenType::Eq`. But
157        // this formulation with the guard lets us avoid specifying a
158        // specific integer for each variant.
159        match $val {
160            $(
161                t if t == TokenType::$tok as u32 => TokenType::$tok,
162            )+
163            _ => panic!("unhandled value: {}", $val),
164        }
165    };
166}
167
168impl TokenType {
169    fn from_u32(val: u32) -> TokenType {
170        let token_type = from_u32_match! { val;
171            Eq,
172            Lt,
173            Le,
174            EqEq,
175            Gt,
176            AndAnd,
177            OrOr,
178            Bang,
179            Tilde,
180
181            Plus,
182            Minus,
183            Star,
184            And,
185            Or,
186
187            At,
188            Dot,
189            DotDot,
190            DotDotDot,
191            DotDotEq,
192            Comma,
193            Semi,
194            Colon,
195            PathSep,
196            RArrow,
197            FatArrow,
198            Pound,
199            Question,
200            OpenParen,
201            CloseParen,
202            OpenBrace,
203            CloseBrace,
204            OpenBracket,
205            CloseBracket,
206            Eof,
207
208            Operator,
209            Ident,
210            Lifetime,
211            Path,
212            Type,
213            Const,
214
215            KwAs,
216            KwAsync,
217            KwAuto,
218            KwAwait,
219            KwBecome,
220            KwBox,
221            KwBreak,
222            KwCatch,
223            KwConst,
224            KwContinue,
225            KwContractEnsures,
226            KwContractRequires,
227            KwCrate,
228            KwDefault,
229            KwDyn,
230            KwElse,
231            KwEnum,
232            KwExtern,
233            KwFn,
234            KwFor,
235            KwGen,
236            KwIf,
237            KwImpl,
238            KwIn,
239            KwLet,
240            KwLoop,
241            KwMacro,
242            KwMacroRules,
243            KwMatch,
244            KwMod,
245            KwMove,
246            KwMut,
247            KwPub,
248            KwRaw,
249            KwRef,
250            KwReturn,
251            KwReuse,
252            KwSafe,
253            KwSelfUpper,
254            KwStatic,
255            KwStruct,
256            KwSuper,
257            KwTrait,
258            KwTry,
259            KwType,
260            KwUnderscore,
261            KwUnsafe,
262            KwUse,
263            KwWhere,
264            KwWhile,
265            KwYield,
266
267            SymAttSyntax,
268            SymClobberAbi,
269            SymInlateout,
270            SymInout,
271            SymIs,
272            SymLabel,
273            SymLateout,
274            SymMayUnwind,
275            SymNomem,
276            SymNoreturn,
277            SymNostack,
278            SymNull,
279            SymOptions,
280            SymOut,
281            SymPreservesFlags,
282            SymPure,
283            SymReadonly,
284            SymSym,
285        };
286        token_type
287    }
288
289    pub(super) fn is_keyword(&self) -> Option<Symbol> {
290        match self {
291            TokenType::KwAs => Some(kw::As),
292            TokenType::KwAsync => Some(kw::Async),
293            TokenType::KwAuto => Some(kw::Auto),
294            TokenType::KwAwait => Some(kw::Await),
295            TokenType::KwBecome => Some(kw::Become),
296            TokenType::KwBox => Some(kw::Box),
297            TokenType::KwBreak => Some(kw::Break),
298            TokenType::KwCatch => Some(kw::Catch),
299            TokenType::KwConst => Some(kw::Const),
300            TokenType::KwContinue => Some(kw::Continue),
301            TokenType::KwContractEnsures => Some(kw::ContractEnsures),
302            TokenType::KwContractRequires => Some(kw::ContractRequires),
303            TokenType::KwCrate => Some(kw::Crate),
304            TokenType::KwDefault => Some(kw::Default),
305            TokenType::KwDyn => Some(kw::Dyn),
306            TokenType::KwElse => Some(kw::Else),
307            TokenType::KwEnum => Some(kw::Enum),
308            TokenType::KwExtern => Some(kw::Extern),
309            TokenType::KwFn => Some(kw::Fn),
310            TokenType::KwFor => Some(kw::For),
311            TokenType::KwGen => Some(kw::Gen),
312            TokenType::KwIf => Some(kw::If),
313            TokenType::KwImpl => Some(kw::Impl),
314            TokenType::KwIn => Some(kw::In),
315            TokenType::KwLet => Some(kw::Let),
316            TokenType::KwLoop => Some(kw::Loop),
317            TokenType::KwMacroRules => Some(kw::MacroRules),
318            TokenType::KwMacro => Some(kw::Macro),
319            TokenType::KwMatch => Some(kw::Match),
320            TokenType::KwMod => Some(kw::Mod),
321            TokenType::KwMove => Some(kw::Move),
322            TokenType::KwMut => Some(kw::Mut),
323            TokenType::KwPub => Some(kw::Pub),
324            TokenType::KwRaw => Some(kw::Raw),
325            TokenType::KwRef => Some(kw::Ref),
326            TokenType::KwReturn => Some(kw::Return),
327            TokenType::KwReuse => Some(kw::Reuse),
328            TokenType::KwSafe => Some(kw::Safe),
329            TokenType::KwSelfUpper => Some(kw::SelfUpper),
330            TokenType::KwStatic => Some(kw::Static),
331            TokenType::KwStruct => Some(kw::Struct),
332            TokenType::KwSuper => Some(kw::Super),
333            TokenType::KwTrait => Some(kw::Trait),
334            TokenType::KwTry => Some(kw::Try),
335            TokenType::KwType => Some(kw::Type),
336            TokenType::KwUnderscore => Some(kw::Underscore),
337            TokenType::KwUnsafe => Some(kw::Unsafe),
338            TokenType::KwUse => Some(kw::Use),
339            TokenType::KwWhere => Some(kw::Where),
340            TokenType::KwWhile => Some(kw::While),
341            TokenType::KwYield => Some(kw::Yield),
342
343            TokenType::SymAttSyntax => Some(sym::att_syntax),
344            TokenType::SymClobberAbi => Some(sym::clobber_abi),
345            TokenType::SymInlateout => Some(sym::inlateout),
346            TokenType::SymInout => Some(sym::inout),
347            TokenType::SymIs => Some(sym::is),
348            TokenType::SymLabel => Some(sym::label),
349            TokenType::SymLateout => Some(sym::lateout),
350            TokenType::SymMayUnwind => Some(sym::may_unwind),
351            TokenType::SymNomem => Some(sym::nomem),
352            TokenType::SymNoreturn => Some(sym::noreturn),
353            TokenType::SymNostack => Some(sym::nostack),
354            TokenType::SymNull => Some(sym::null),
355            TokenType::SymOptions => Some(sym::options),
356            TokenType::SymOut => Some(sym::out),
357            TokenType::SymPreservesFlags => Some(sym::preserves_flags),
358            TokenType::SymPure => Some(sym::pure),
359            TokenType::SymReadonly => Some(sym::readonly),
360            TokenType::SymSym => Some(sym::sym),
361            _ => None,
362        }
363    }
364
365    // The output should be the same as that produced by
366    // `rustc_ast_pretty::pprust::token_to_string`.
367    pub(super) fn to_string(&self) -> String {
368        match self {
369            TokenType::Eq => "`=`",
370            TokenType::Lt => "`<`",
371            TokenType::Le => "`<=`",
372            TokenType::EqEq => "`==`",
373            TokenType::Gt => "`>`",
374            TokenType::AndAnd => "`&&`",
375            TokenType::OrOr => "`||`",
376            TokenType::Bang => "`!`",
377            TokenType::Tilde => "`~`",
378
379            TokenType::Plus => "`+`",
380            TokenType::Minus => "`-`",
381            TokenType::Star => "`*`",
382            TokenType::And => "`&`",
383            TokenType::Or => "`|`",
384
385            TokenType::At => "`@`",
386            TokenType::Dot => "`.`",
387            TokenType::DotDot => "`..`",
388            TokenType::DotDotDot => "`...`",
389            TokenType::DotDotEq => "`..=`",
390            TokenType::Comma => "`,`",
391            TokenType::Semi => "`;`",
392            TokenType::Colon => "`:`",
393            TokenType::PathSep => "`::`",
394            TokenType::RArrow => "`->`",
395            TokenType::FatArrow => "`=>`",
396            TokenType::Pound => "`#`",
397            TokenType::Question => "`?`",
398            TokenType::OpenParen => "`(`",
399            TokenType::CloseParen => "`)`",
400            TokenType::OpenBrace => "`{`",
401            TokenType::CloseBrace => "`}`",
402            TokenType::OpenBracket => "`[`",
403            TokenType::CloseBracket => "`]`",
404            TokenType::Eof => "<eof>",
405
406            TokenType::Operator => "an operator",
407            TokenType::Ident => "identifier",
408            TokenType::Lifetime => "lifetime",
409            TokenType::Path => "path",
410            TokenType::Type => "type",
411            TokenType::Const => "a const expression",
412
413            _ => return format!("`{}`", self.is_keyword().unwrap()),
414        }
415        .to_string()
416    }
417}
418
419/// Used by various `Parser` methods such as `check` and `eat`. The first field
420/// is always by used those methods. The second field is only used when the
421/// first field doesn't match.
422#[derive(Clone, Copy, Debug)]
423pub struct ExpTokenPair {
424    pub tok: TokenKind,
425    pub token_type: TokenType,
426}
427
428/// Used by various `Parser` methods such as `check_keyword` and `eat_keyword`.
429/// The first field is always used by those methods. The second field is only
430/// used when the first field doesn't match.
431#[derive(Clone, Copy)]
432pub struct ExpKeywordPair {
433    pub kw: Symbol,
434    pub token_type: TokenType,
435}
436
437// Gets a statically-known `ExpTokenPair` pair (for non-keywords) or
438// `ExpKeywordPair` (for keywords), as used with various `check`/`expect`
439// methods in `Parser`.
440//
441// The name is short because it's used a lot.
442#[macro_export]
443// We don't use the normal `#[rustfmt::skip]` here because that triggers a
444// bogus "macro-expanded `macro_export` macros from the current crate cannot be
445// referred to by absolute paths" error, ugh. See #52234.
446#[cfg_attr(rustfmt, rustfmt::skip)]
447macro_rules! exp {
448    // `ExpTokenPair` helper rules.
449    (@tok, $tok:ident) => {
450        $crate::parser::token_type::ExpTokenPair {
451            tok: rustc_ast::token::$tok,
452            token_type: $crate::parser::token_type::TokenType::$tok
453        }
454    };
455
456    // `ExpKeywordPair` helper rules.
457    (@kw, $kw:ident, $token_type:ident) => {
458        $crate::parser::token_type::ExpKeywordPair {
459            kw: rustc_span::symbol::kw::$kw,
460            token_type: $crate::parser::token_type::TokenType::$token_type,
461        }
462    };
463    (@sym, $kw:ident, $token_type:ident) => {
464        $crate::parser::token_type::ExpKeywordPair {
465            kw: rustc_span::symbol::sym::$kw,
466            token_type: $crate::parser::token_type::TokenType::$token_type,
467        }
468    };
469
470    (Eq)             => { exp!(@tok, Eq) };
471    (Lt)             => { exp!(@tok, Lt) };
472    (Le)             => { exp!(@tok, Le) };
473    (EqEq)           => { exp!(@tok, EqEq) };
474    (Gt)             => { exp!(@tok, Gt) };
475    (AndAnd)         => { exp!(@tok, AndAnd) };
476    (OrOr)           => { exp!(@tok, OrOr) };
477    (Bang)           => { exp!(@tok, Bang) };
478    (Tilde)          => { exp!(@tok, Tilde) };
479    (Plus)           => { exp!(@tok, Plus) };
480    (Minus)          => { exp!(@tok, Minus) };
481    (Star)           => { exp!(@tok, Star) };
482    (And)            => { exp!(@tok, And) };
483    (Or)             => { exp!(@tok, Or) };
484    (At)             => { exp!(@tok, At) };
485    (Dot)            => { exp!(@tok, Dot) };
486    (DotDot)         => { exp!(@tok, DotDot) };
487    (DotDotDot)      => { exp!(@tok, DotDotDot) };
488    (DotDotEq)       => { exp!(@tok, DotDotEq) };
489    (Comma)          => { exp!(@tok, Comma) };
490    (Semi)           => { exp!(@tok, Semi) };
491    (Colon)          => { exp!(@tok, Colon) };
492    (PathSep)        => { exp!(@tok, PathSep) };
493    (RArrow)         => { exp!(@tok, RArrow) };
494    (FatArrow)       => { exp!(@tok, FatArrow) };
495    (Pound)          => { exp!(@tok, Pound) };
496    (Question)       => { exp!(@tok, Question) };
497    (Eof)            => { exp!(@tok, Eof) };
498
499    (OpenParen)      => { exp!(@tok, OpenParen) };
500    (OpenBrace)      => { exp!(@tok, OpenBrace) };
501    (OpenBracket)    => { exp!(@tok, OpenBracket) };
502    (CloseParen)     => { exp!(@tok, CloseParen) };
503    (CloseBrace)     => { exp!(@tok, CloseBrace) };
504    (CloseBracket)   => { exp!(@tok, CloseBracket) };
505
506    (As)             => { exp!(@kw, As,         KwAs) };
507    (Async)          => { exp!(@kw, Async,      KwAsync) };
508    (Auto)           => { exp!(@kw, Auto,       KwAuto) };
509    (Await)          => { exp!(@kw, Await,      KwAwait) };
510    (Become)         => { exp!(@kw, Become,     KwBecome) };
511    (Box)            => { exp!(@kw, Box,        KwBox) };
512    (Break)          => { exp!(@kw, Break,      KwBreak) };
513    (Catch)          => { exp!(@kw, Catch,      KwCatch) };
514    (Const)          => { exp!(@kw, Const,      KwConst) };
515    (Continue)       => { exp!(@kw, Continue,   KwContinue) };
516    (ContractEnsures)  => { exp!(@kw, ContractEnsures, KwContractEnsures) };
517    (ContractRequires) => { exp!(@kw, ContractRequires, KwContractRequires) };
518    (Crate)          => { exp!(@kw, Crate,      KwCrate) };
519    (Default)        => { exp!(@kw, Default,    KwDefault) };
520    (Dyn)            => { exp!(@kw, Dyn,        KwDyn) };
521    (Else)           => { exp!(@kw, Else,       KwElse) };
522    (Enum)           => { exp!(@kw, Enum,       KwEnum) };
523    (Extern)         => { exp!(@kw, Extern,     KwExtern) };
524    (Fn)             => { exp!(@kw, Fn,         KwFn) };
525    (For)            => { exp!(@kw, For,        KwFor) };
526    (Gen)            => { exp!(@kw, Gen,        KwGen) };
527    (If)             => { exp!(@kw, If,         KwIf) };
528    (Impl)           => { exp!(@kw, Impl,       KwImpl) };
529    (In)             => { exp!(@kw, In,         KwIn) };
530    (Let)            => { exp!(@kw, Let,        KwLet) };
531    (Loop)           => { exp!(@kw, Loop,       KwLoop) };
532    (Macro)          => { exp!(@kw, Macro,      KwMacro) };
533    (MacroRules)     => { exp!(@kw, MacroRules, KwMacroRules) };
534    (Match)          => { exp!(@kw, Match,      KwMatch) };
535    (Mod)            => { exp!(@kw, Mod,        KwMod) };
536    (Move)           => { exp!(@kw, Move,       KwMove) };
537    (Mut)            => { exp!(@kw, Mut,        KwMut) };
538    (Pub)            => { exp!(@kw, Pub,        KwPub) };
539    (Raw)            => { exp!(@kw, Raw,        KwRaw) };
540    (Ref)            => { exp!(@kw, Ref,        KwRef) };
541    (Return)         => { exp!(@kw, Return,     KwReturn) };
542    (Reuse)          => { exp!(@kw, Reuse,      KwReuse) };
543    (Safe)           => { exp!(@kw, Safe,       KwSafe) };
544    (SelfUpper)      => { exp!(@kw, SelfUpper,  KwSelfUpper) };
545    (Static)         => { exp!(@kw, Static,     KwStatic) };
546    (Struct)         => { exp!(@kw, Struct,     KwStruct) };
547    (Super)          => { exp!(@kw, Super,      KwSuper) };
548    (Trait)          => { exp!(@kw, Trait,      KwTrait) };
549    (Try)            => { exp!(@kw, Try,        KwTry) };
550    (Type)           => { exp!(@kw, Type,       KwType) };
551    (Underscore)     => { exp!(@kw, Underscore, KwUnderscore) };
552    (Unsafe)         => { exp!(@kw, Unsafe,     KwUnsafe) };
553    (Use)            => { exp!(@kw, Use,        KwUse) };
554    (Where)          => { exp!(@kw, Where,      KwWhere) };
555    (While)          => { exp!(@kw, While,      KwWhile) };
556    (Yield)          => { exp!(@kw, Yield,      KwYield) };
557
558    (AttSyntax)      => { exp!(@sym, att_syntax,      SymAttSyntax) };
559    (ClobberAbi)     => { exp!(@sym, clobber_abi,     SymClobberAbi) };
560    (Inlateout)      => { exp!(@sym, inlateout,       SymInlateout) };
561    (Inout)          => { exp!(@sym, inout,           SymInout) };
562    (Is)             => { exp!(@sym, is,              SymIs) };
563    (Label)          => { exp!(@sym, label,           SymLabel) };
564    (Lateout)        => { exp!(@sym, lateout,         SymLateout) };
565    (MayUnwind)      => { exp!(@sym, may_unwind,      SymMayUnwind) };
566    (Nomem)          => { exp!(@sym, nomem,           SymNomem) };
567    (Noreturn)       => { exp!(@sym, noreturn,        SymNoreturn) };
568    (Nostack)        => { exp!(@sym, nostack,         SymNostack) };
569    (Null)           => { exp!(@sym, null,            SymNull) };
570    (Options)        => { exp!(@sym, options,         SymOptions) };
571    (Out)            => { exp!(@sym, out,             SymOut) };
572    (Pin)            => { exp!(@sym, pin,             SymPin) };
573    (PreservesFlags) => { exp!(@sym, preserves_flags, SymPreservesFlags) };
574    (Pure)           => { exp!(@sym, pure,            SymPure) };
575    (Readonly)       => { exp!(@sym, readonly,        SymReadonly) };
576    (Sym)            => { exp!(@sym, sym,             SymSym) };
577}
578
579/// A bitset type designed specifically for `Parser::expected_token_types`,
580/// which is very hot. `u128` is the smallest integer that will fit every
581/// `TokenType` value.
582#[derive(Clone, Copy)]
583pub(super) struct TokenTypeSet(u128);
584
585impl TokenTypeSet {
586    pub(super) fn new() -> TokenTypeSet {
587        TokenTypeSet(0)
588    }
589
590    pub(super) fn is_empty(&self) -> bool {
591        self.0 == 0
592    }
593
594    pub(super) fn insert(&mut self, token_type: TokenType) {
595        self.0 = self.0 | (1u128 << token_type as u32)
596    }
597
598    pub(super) fn clear(&mut self) {
599        self.0 = 0
600    }
601
602    pub(super) fn contains(&self, token_type: TokenType) -> bool {
603        self.0 & (1u128 << token_type as u32) != 0
604    }
605
606    pub(super) fn iter(&self) -> TokenTypeSetIter {
607        TokenTypeSetIter(*self)
608    }
609}
610
611// The `TokenTypeSet` is a copy of the set being iterated. It initially holds
612// the entire set. Each bit is cleared as it is returned. We have finished once
613// it is all zeroes.
614pub(super) struct TokenTypeSetIter(TokenTypeSet);
615
616impl Iterator for TokenTypeSetIter {
617    type Item = TokenType;
618
619    fn next(&mut self) -> Option<TokenType> {
620        let num_bits: u32 = (size_of_val(&self.0.0) * 8) as u32;
621        assert_eq!(num_bits, 128);
622        let z = self.0.0.trailing_zeros();
623        if z == num_bits {
624            None
625        } else {
626            self.0.0 &= !(1 << z); // clear the trailing 1 bit
627            Some(TokenType::from_u32(z))
628        }
629    }
630}