rustc_ast/
tokenstream.rs

1//! # Token Streams
2//!
3//! `TokenStream`s represent syntactic objects before they are converted into ASTs.
4//! A `TokenStream` is, roughly speaking, a sequence of [`TokenTree`]s,
5//! which are themselves a single [`Token`] or a `Delimited` subsequence of tokens.
6//!
7//! ## Ownership
8//!
9//! `TokenStream`s are persistent data structures constructed as ropes with reference
10//! counted-children. In general, this means that calling an operation on a `TokenStream`
11//! (such as `slice`) produces an entirely new `TokenStream` from the borrowed reference to
12//! the original. This essentially coerces `TokenStream`s into "views" of their subparts,
13//! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
14//! ownership of the original.
15
16use std::borrow::Cow;
17use std::ops::Range;
18use std::sync::Arc;
19use std::{cmp, fmt, iter, mem};
20
21use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
22use rustc_data_structures::sync;
23use rustc_macros::{Decodable, Encodable, HashStable_Generic};
24use rustc_serialize::{Decodable, Encodable};
25use rustc_span::{DUMMY_SP, Span, SpanDecoder, SpanEncoder, Symbol, sym};
26use thin_vec::ThinVec;
27
28use crate::ast::AttrStyle;
29use crate::ast_traits::{HasAttrs, HasTokens};
30use crate::token::{self, Delimiter, Token, TokenKind};
31use crate::{AttrVec, Attribute};
32
33/// Part of a `TokenStream`.
34#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
35pub enum TokenTree {
36    /// A single token. Should never be `OpenDelim` or `CloseDelim`, because
37    /// delimiters are implicitly represented by `Delimited`.
38    Token(Token, Spacing),
39    /// A delimited sequence of token trees.
40    Delimited(DelimSpan, DelimSpacing, Delimiter, TokenStream),
41}
42
43// Ensure all fields of `TokenTree` are `DynSend` and `DynSync`.
44fn _dummy()
45where
46    Token: sync::DynSend + sync::DynSync,
47    Spacing: sync::DynSend + sync::DynSync,
48    DelimSpan: sync::DynSend + sync::DynSync,
49    Delimiter: sync::DynSend + sync::DynSync,
50    TokenStream: sync::DynSend + sync::DynSync,
51{
52}
53
54impl TokenTree {
55    /// Checks if this `TokenTree` is equal to the other, regardless of span/spacing information.
56    pub fn eq_unspanned(&self, other: &TokenTree) -> bool {
57        match (self, other) {
58            (TokenTree::Token(token, _), TokenTree::Token(token2, _)) => token.kind == token2.kind,
59            (TokenTree::Delimited(.., delim, tts), TokenTree::Delimited(.., delim2, tts2)) => {
60                delim == delim2
61                    && tts.len() == tts2.len()
62                    && tts.iter().zip(tts2.iter()).all(|(a, b)| a.eq_unspanned(b))
63            }
64            _ => false,
65        }
66    }
67
68    /// Retrieves the `TokenTree`'s span.
69    pub fn span(&self) -> Span {
70        match self {
71            TokenTree::Token(token, _) => token.span,
72            TokenTree::Delimited(sp, ..) => sp.entire(),
73        }
74    }
75
76    /// Create a `TokenTree::Token` with alone spacing.
77    pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
78        TokenTree::Token(Token::new(kind, span), Spacing::Alone)
79    }
80
81    /// Create a `TokenTree::Token` with joint spacing.
82    pub fn token_joint(kind: TokenKind, span: Span) -> TokenTree {
83        TokenTree::Token(Token::new(kind, span), Spacing::Joint)
84    }
85
86    /// Create a `TokenTree::Token` with joint-hidden spacing.
87    pub fn token_joint_hidden(kind: TokenKind, span: Span) -> TokenTree {
88        TokenTree::Token(Token::new(kind, span), Spacing::JointHidden)
89    }
90
91    pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
92        match self {
93            TokenTree::Token(token, spacing) => match token.uninterpolate() {
94                Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *spacing)),
95                Cow::Borrowed(_) => Cow::Borrowed(self),
96            },
97            _ => Cow::Borrowed(self),
98        }
99    }
100}
101
102impl<CTX> HashStable<CTX> for TokenStream
103where
104    CTX: crate::HashStableContext,
105{
106    fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
107        for sub_tt in self.iter() {
108            sub_tt.hash_stable(hcx, hasher);
109        }
110    }
111}
112
113/// A lazy version of [`AttrTokenStream`], which defers creation of an actual
114/// `AttrTokenStream` until it is needed.
115#[derive(Clone)]
116pub struct LazyAttrTokenStream(Arc<LazyAttrTokenStreamInner>);
117
118impl LazyAttrTokenStream {
119    pub fn new_direct(stream: AttrTokenStream) -> LazyAttrTokenStream {
120        LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Direct(stream)))
121    }
122
123    pub fn new_pending(
124        start_token: (Token, Spacing),
125        cursor_snapshot: TokenCursor,
126        num_calls: u32,
127        break_last_token: u32,
128        node_replacements: ThinVec<NodeReplacement>,
129    ) -> LazyAttrTokenStream {
130        LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Pending {
131            start_token,
132            cursor_snapshot,
133            num_calls,
134            break_last_token,
135            node_replacements,
136        }))
137    }
138
139    pub fn to_attr_token_stream(&self) -> AttrTokenStream {
140        self.0.to_attr_token_stream()
141    }
142}
143
144impl fmt::Debug for LazyAttrTokenStream {
145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146        write!(f, "LazyAttrTokenStream({:?})", self.to_attr_token_stream())
147    }
148}
149
150impl<S: SpanEncoder> Encodable<S> for LazyAttrTokenStream {
151    fn encode(&self, _s: &mut S) {
152        panic!("Attempted to encode LazyAttrTokenStream");
153    }
154}
155
156impl<D: SpanDecoder> Decodable<D> for LazyAttrTokenStream {
157    fn decode(_d: &mut D) -> Self {
158        panic!("Attempted to decode LazyAttrTokenStream");
159    }
160}
161
162impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
163    fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
164        panic!("Attempted to compute stable hash for LazyAttrTokenStream");
165    }
166}
167
168/// A token range within a `Parser`'s full token stream.
169#[derive(Clone, Debug)]
170pub struct ParserRange(pub Range<u32>);
171
172/// A token range within an individual AST node's (lazy) token stream, i.e.
173/// relative to that node's first token. Distinct from `ParserRange` so the two
174/// kinds of range can't be mixed up.
175#[derive(Clone, Debug)]
176pub struct NodeRange(pub Range<u32>);
177
178/// Indicates a range of tokens that should be replaced by an `AttrsTarget`
179/// (replacement) or be replaced by nothing (deletion). This is used in two
180/// places during token collection.
181///
182/// 1. Replacement. During the parsing of an AST node that may have a
183///    `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]`
184///    or `#[cfg_attr]`, we replace the entire inner AST node with
185///    `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an
186///    `AttrTokenStream`.
187///
188/// 2. Deletion. We delete inner attributes from all collected token streams,
189///    and instead track them through the `attrs` field on the AST node. This
190///    lets us manipulate them similarly to outer attributes. When we create a
191///    `TokenStream`, the inner attributes are inserted into the proper place
192///    in the token stream.
193///
194/// Each replacement starts off in `ParserReplacement` form but is converted to
195/// `NodeReplacement` form when it is attached to a single AST node, via
196/// `LazyAttrTokenStreamImpl`.
197pub type ParserReplacement = (ParserRange, Option<AttrsTarget>);
198
199/// See the comment on `ParserReplacement`.
200pub type NodeReplacement = (NodeRange, Option<AttrsTarget>);
201
202impl NodeRange {
203    // Converts a range within a parser's tokens to a range within a
204    // node's tokens beginning at `start_pos`.
205    //
206    // For example, imagine a parser with 50 tokens in its token stream, a
207    // function that spans `ParserRange(20..40)` and an inner attribute within
208    // that function that spans `ParserRange(30..35)`. We would find the inner
209    // attribute's range within the function's tokens by subtracting 20, which
210    // is the position of the function's start token. This gives
211    // `NodeRange(10..15)`.
212    pub fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
213        assert!(!parser_range.is_empty());
214        assert!(parser_range.start >= start_pos);
215        NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
216    }
217}
218
219enum LazyAttrTokenStreamInner {
220    // The token stream has already been produced.
221    Direct(AttrTokenStream),
222
223    // From a value of this type we can reconstruct the `TokenStream` seen by
224    // the `f` callback passed to a call to `Parser::collect_tokens`, by
225    // replaying the getting of the tokens. This saves us producing a
226    // `TokenStream` if it is never needed, e.g. a captured `macro_rules!`
227    // argument that is never passed to a proc macro. In practice, token stream
228    // creation happens rarely compared to calls to `collect_tokens` (see some
229    // statistics in #78736) so we are doing as little up-front work as
230    // possible.
231    //
232    // This also makes `Parser` very cheap to clone, since there is no
233    // intermediate collection buffer to clone.
234    Pending {
235        start_token: (Token, Spacing),
236        cursor_snapshot: TokenCursor,
237        num_calls: u32,
238        break_last_token: u32,
239        node_replacements: ThinVec<NodeReplacement>,
240    },
241}
242
243impl LazyAttrTokenStreamInner {
244    fn to_attr_token_stream(&self) -> AttrTokenStream {
245        match self {
246            LazyAttrTokenStreamInner::Direct(stream) => stream.clone(),
247            LazyAttrTokenStreamInner::Pending {
248                start_token,
249                cursor_snapshot,
250                num_calls,
251                break_last_token,
252                node_replacements,
253            } => {
254                // The token produced by the final call to `{,inlined_}next` was not
255                // actually consumed by the callback. The combination of chaining the
256                // initial token and using `take` produces the desired result - we
257                // produce an empty `TokenStream` if no calls were made, and omit the
258                // final token otherwise.
259                let mut cursor_snapshot = cursor_snapshot.clone();
260                let tokens = iter::once(FlatToken::Token(*start_token))
261                    .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
262                    .take(*num_calls as usize);
263
264                if node_replacements.is_empty() {
265                    make_attr_token_stream(tokens, *break_last_token)
266                } else {
267                    let mut tokens: Vec<_> = tokens.collect();
268                    let mut node_replacements = node_replacements.to_vec();
269                    node_replacements.sort_by_key(|(range, _)| range.0.start);
270
271                    #[cfg(debug_assertions)]
272                    for [(node_range, tokens), (next_node_range, next_tokens)] in
273                        node_replacements.array_windows()
274                    {
275                        assert!(
276                            node_range.0.end <= next_node_range.0.start
277                                || node_range.0.end >= next_node_range.0.end,
278                            "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
279                            node_range,
280                            tokens,
281                            next_node_range,
282                            next_tokens,
283                        );
284                    }
285
286                    // Process the replace ranges, starting from the highest start
287                    // position and working our way back. If have tokens like:
288                    //
289                    // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
290                    //
291                    // Then we will generate replace ranges for both
292                    // the `#[cfg(FALSE)] field: bool` and the entire
293                    // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
294                    //
295                    // By starting processing from the replace range with the greatest
296                    // start position, we ensure that any (outer) replace range which
297                    // encloses another (inner) replace range will fully overwrite the
298                    // inner range's replacement.
299                    for (node_range, target) in node_replacements.into_iter().rev() {
300                        assert!(
301                            !node_range.0.is_empty(),
302                            "Cannot replace an empty node range: {:?}",
303                            node_range.0
304                        );
305
306                        // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
307                        // plus enough `FlatToken::Empty`s to fill up the rest of the range. This
308                        // keeps the total length of `tokens` constant throughout the replacement
309                        // process, allowing us to do all replacements without adjusting indices.
310                        let target_len = target.is_some() as usize;
311                        tokens.splice(
312                            (node_range.0.start as usize)..(node_range.0.end as usize),
313                            target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
314                                iter::repeat(FlatToken::Empty)
315                                    .take(node_range.0.len() - target_len),
316                            ),
317                        );
318                    }
319                    make_attr_token_stream(tokens.into_iter(), *break_last_token)
320                }
321            }
322        }
323    }
324}
325
326/// A helper struct used when building an `AttrTokenStream` from
327/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
328/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
329/// is then 'parsed' to build up an `AttrTokenStream` with nested
330/// `AttrTokenTree::Delimited` tokens.
331#[derive(Debug, Clone)]
332enum FlatToken {
333    /// A token - this holds both delimiter (e.g. '{' and '}')
334    /// and non-delimiter tokens
335    Token((Token, Spacing)),
336    /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
337    /// directly into the constructed `AttrTokenStream` as an
338    /// `AttrTokenTree::AttrsTarget`.
339    AttrsTarget(AttrsTarget),
340    /// A special 'empty' token that is ignored during the conversion
341    /// to an `AttrTokenStream`. This is used to simplify the
342    /// handling of replace ranges.
343    Empty,
344}
345
346/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
347/// information about the tokens for attribute targets. This is used
348/// during expansion to perform early cfg-expansion, and to process attributes
349/// during proc-macro invocations.
350#[derive(Clone, Debug, Default, Encodable, Decodable)]
351pub struct AttrTokenStream(pub Arc<Vec<AttrTokenTree>>);
352
353/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
354/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
355/// close delims.
356fn make_attr_token_stream(
357    iter: impl Iterator<Item = FlatToken>,
358    break_last_token: u32,
359) -> AttrTokenStream {
360    #[derive(Debug)]
361    struct FrameData {
362        // This is `None` for the first frame, `Some` for all others.
363        open_delim_sp: Option<(Delimiter, Span, Spacing)>,
364        inner: Vec<AttrTokenTree>,
365    }
366    // The stack always has at least one element. Storing it separately makes for shorter code.
367    let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
368    let mut stack_rest = vec![];
369    for flat_token in iter {
370        match flat_token {
371            FlatToken::Token((token @ Token { kind, span }, spacing)) => {
372                if let Some(delim) = kind.open_delim() {
373                    stack_rest.push(mem::replace(
374                        &mut stack_top,
375                        FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
376                    ));
377                } else if let Some(delim) = kind.close_delim() {
378                    let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
379                    let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
380                    assert!(
381                        open_delim.eq_ignoring_invisible_origin(&delim),
382                        "Mismatched open/close delims: open={open_delim:?} close={span:?}"
383                    );
384                    let dspan = DelimSpan::from_pair(open_sp, span);
385                    let dspacing = DelimSpacing::new(open_spacing, spacing);
386                    let stream = AttrTokenStream::new(frame_data.inner);
387                    let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
388                    stack_top.inner.push(delimited);
389                } else {
390                    stack_top.inner.push(AttrTokenTree::Token(token, spacing))
391                }
392            }
393            FlatToken::AttrsTarget(target) => {
394                stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
395            }
396            FlatToken::Empty => {}
397        }
398    }
399
400    if break_last_token > 0 {
401        let last_token = stack_top.inner.pop().unwrap();
402        if let AttrTokenTree::Token(last_token, spacing) = last_token {
403            let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();
404
405            // Tokens are always ASCII chars, so we can use byte arithmetic here.
406            let mut first_span = last_token.span.shrink_to_lo();
407            first_span =
408                first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));
409
410            stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
411        } else {
412            panic!("Unexpected last token {last_token:?}")
413        }
414    }
415    AttrTokenStream::new(stack_top.inner)
416}
417
418/// Like `TokenTree`, but for `AttrTokenStream`.
419#[derive(Clone, Debug, Encodable, Decodable)]
420pub enum AttrTokenTree {
421    Token(Token, Spacing),
422    Delimited(DelimSpan, DelimSpacing, Delimiter, AttrTokenStream),
423    /// Stores the attributes for an attribute target,
424    /// along with the tokens for that attribute target.
425    /// See `AttrsTarget` for more information
426    AttrsTarget(AttrsTarget),
427}
428
429impl AttrTokenStream {
430    pub fn new(tokens: Vec<AttrTokenTree>) -> AttrTokenStream {
431        AttrTokenStream(Arc::new(tokens))
432    }
433
434    /// Converts this `AttrTokenStream` to a plain `Vec<TokenTree>`. During
435    /// conversion, any `AttrTokenTree::AttrsTarget` gets "flattened" back to a
436    /// `TokenStream`, as described in the comment on
437    /// `attrs_and_tokens_to_token_trees`.
438    pub fn to_token_trees(&self) -> Vec<TokenTree> {
439        let mut res = Vec::with_capacity(self.0.len());
440        for tree in self.0.iter() {
441            match tree {
442                AttrTokenTree::Token(inner, spacing) => {
443                    res.push(TokenTree::Token(inner.clone(), *spacing));
444                }
445                AttrTokenTree::Delimited(span, spacing, delim, stream) => {
446                    res.push(TokenTree::Delimited(
447                        *span,
448                        *spacing,
449                        *delim,
450                        TokenStream::new(stream.to_token_trees()),
451                    ))
452                }
453                AttrTokenTree::AttrsTarget(target) => {
454                    attrs_and_tokens_to_token_trees(&target.attrs, &target.tokens, &mut res);
455                }
456            }
457        }
458        res
459    }
460}
461
462// Converts multiple attributes and the tokens for a target AST node into token trees, and appends
463// them to `res`.
464//
465// Example: if the AST node is "fn f() { blah(); }", then:
466// - Simple if no attributes are present, e.g. "fn f() { blah(); }"
467// - Simple if only outer attribute are present, e.g. "#[outer1] #[outer2] fn f() { blah(); }"
468// - Trickier if inner attributes are present, because they must be moved within the AST node's
469//   tokens, e.g. "#[outer] fn f() { #![inner] blah() }"
470fn attrs_and_tokens_to_token_trees(
471    attrs: &[Attribute],
472    target_tokens: &LazyAttrTokenStream,
473    res: &mut Vec<TokenTree>,
474) {
475    let idx = attrs.partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer));
476    let (outer_attrs, inner_attrs) = attrs.split_at(idx);
477
478    // Add outer attribute tokens.
479    for attr in outer_attrs {
480        res.extend(attr.token_trees());
481    }
482
483    // Add target AST node tokens.
484    res.extend(target_tokens.to_attr_token_stream().to_token_trees());
485
486    // Insert inner attribute tokens.
487    if !inner_attrs.is_empty() {
488        let found = insert_inner_attrs(inner_attrs, res);
489        assert!(found, "Failed to find trailing delimited group in: {res:?}");
490    }
491
492    // Inner attributes are only supported on blocks, functions, impls, and
493    // modules. All of these have their inner attributes placed at the
494    // beginning of the rightmost outermost braced group:
495    // e.g. `fn foo() { #![my_attr] }`. (Note: the braces may be within
496    // invisible delimiters.)
497    //
498    // Therefore, we can insert them back into the right location without
499    // needing to do any extra position tracking.
500    //
501    // Note: Outline modules are an exception - they can have attributes like
502    // `#![my_attr]` at the start of a file. Support for custom attributes in
503    // this position is not properly implemented - we always synthesize fake
504    // tokens, so we never reach this code.
505    fn insert_inner_attrs(inner_attrs: &[Attribute], tts: &mut Vec<TokenTree>) -> bool {
506        for tree in tts.iter_mut().rev() {
507            if let TokenTree::Delimited(span, spacing, Delimiter::Brace, stream) = tree {
508                // Found it: the rightmost, outermost braced group.
509                let mut tts = vec![];
510                for inner_attr in inner_attrs {
511                    tts.extend(inner_attr.token_trees());
512                }
513                tts.extend(stream.0.iter().cloned());
514                let stream = TokenStream::new(tts);
515                *tree = TokenTree::Delimited(*span, *spacing, Delimiter::Brace, stream);
516                return true;
517            } else if let TokenTree::Delimited(span, spacing, Delimiter::Invisible(src), stream) =
518                tree
519            {
520                // Recurse inside invisible delimiters.
521                let mut vec: Vec<_> = stream.iter().cloned().collect();
522                if insert_inner_attrs(inner_attrs, &mut vec) {
523                    *tree = TokenTree::Delimited(
524                        *span,
525                        *spacing,
526                        Delimiter::Invisible(*src),
527                        TokenStream::new(vec),
528                    );
529                    return true;
530                }
531            }
532        }
533        false
534    }
535}
536
537/// Stores the tokens for an attribute target, along
538/// with its attributes.
539///
540/// This is constructed during parsing when we need to capture
541/// tokens, for `cfg` and `cfg_attr` attributes.
542///
543/// For example, `#[cfg(FALSE)] struct Foo {}` would
544/// have an `attrs` field containing the `#[cfg(FALSE)]` attr,
545/// and a `tokens` field storing the (unparsed) tokens `struct Foo {}`
546///
547/// The `cfg`/`cfg_attr` processing occurs in
548/// `StripUnconfigured::configure_tokens`.
549#[derive(Clone, Debug, Encodable, Decodable)]
550pub struct AttrsTarget {
551    /// Attributes, both outer and inner.
552    /// These are stored in the original order that they were parsed in.
553    pub attrs: AttrVec,
554    /// The underlying tokens for the attribute target that `attrs`
555    /// are applied to
556    pub tokens: LazyAttrTokenStream,
557}
558
559/// A `TokenStream` is an abstract sequence of tokens, organized into [`TokenTree`]s.
560#[derive(Clone, Debug, Default, Encodable, Decodable)]
561pub struct TokenStream(pub(crate) Arc<Vec<TokenTree>>);
562
563/// Indicates whether a token can join with the following token to form a
564/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to
565/// guide pretty-printing, which is where the `JointHidden` value (which isn't
566/// part of `proc_macro::Spacing`) comes in useful.
567#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
568pub enum Spacing {
569    /// The token cannot join with the following token to form a compound
570    /// token.
571    ///
572    /// In token streams parsed from source code, the compiler will use `Alone`
573    /// for any token immediately followed by whitespace, a non-doc comment, or
574    /// EOF.
575    ///
576    /// When constructing token streams within the compiler, use this for each
577    /// token that (a) should be pretty-printed with a space after it, or (b)
578    /// is the last token in the stream. (In the latter case the choice of
579    /// spacing doesn't matter because it is never used for the last token. We
580    /// arbitrarily use `Alone`.)
581    ///
582    /// Converts to `proc_macro::Spacing::Alone`, and
583    /// `proc_macro::Spacing::Alone` converts back to this.
584    Alone,
585
586    /// The token can join with the following token to form a compound token.
587    ///
588    /// In token streams parsed from source code, the compiler will use `Joint`
589    /// for any token immediately followed by punctuation (as determined by
590    /// `Token::is_punct`).
591    ///
592    /// When constructing token streams within the compiler, use this for each
593    /// token that (a) should be pretty-printed without a space after it, and
594    /// (b) is followed by a punctuation token.
595    ///
596    /// Converts to `proc_macro::Spacing::Joint`, and
597    /// `proc_macro::Spacing::Joint` converts back to this.
598    Joint,
599
600    /// The token can join with the following token to form a compound token,
601    /// but this will not be visible at the proc macro level. (This is what the
602    /// `Hidden` means; see below.)
603    ///
604    /// In token streams parsed from source code, the compiler will use
605    /// `JointHidden` for any token immediately followed by anything not
606    /// covered by the `Alone` and `Joint` cases: an identifier, lifetime,
607    /// literal, delimiter, doc comment.
608    ///
609    /// When constructing token streams, use this for each token that (a)
610    /// should be pretty-printed without a space after it, and (b) is followed
611    /// by a non-punctuation token.
612    ///
613    /// Converts to `proc_macro::Spacing::Alone`, but
614    /// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`.
615    /// Because of that, pretty-printing of `TokenStream`s produced by proc
616    /// macros is unavoidably uglier (with more whitespace between tokens) than
617    /// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed
618    /// source code, internally constructed token streams, and token streams
619    /// produced by declarative macros).
620    JointHidden,
621}
622
623impl TokenStream {
624    /// Given a `TokenStream` with a `Stream` of only two arguments, return a new `TokenStream`
625    /// separating the two arguments with a comma for diagnostic suggestions.
626    pub fn add_comma(&self) -> Option<(TokenStream, Span)> {
627        // Used to suggest if a user writes `foo!(a b);`
628        let mut suggestion = None;
629        let mut iter = self.0.iter().enumerate().peekable();
630        while let Some((pos, ts)) = iter.next() {
631            if let Some((_, next)) = iter.peek() {
632                let sp = match (&ts, &next) {
633                    (_, TokenTree::Token(Token { kind: token::Comma, .. }, _)) => continue,
634                    (
635                        TokenTree::Token(token_left, Spacing::Alone),
636                        TokenTree::Token(token_right, _),
637                    ) if ((token_left.is_ident() && !token_left.is_reserved_ident())
638                        || token_left.is_lit())
639                        && ((token_right.is_ident() && !token_right.is_reserved_ident())
640                            || token_right.is_lit()) =>
641                    {
642                        token_left.span
643                    }
644                    (TokenTree::Delimited(sp, ..), _) => sp.entire(),
645                    _ => continue,
646                };
647                let sp = sp.shrink_to_hi();
648                let comma = TokenTree::token_alone(token::Comma, sp);
649                suggestion = Some((pos, comma, sp));
650            }
651        }
652        if let Some((pos, comma, sp)) = suggestion {
653            let mut new_stream = Vec::with_capacity(self.0.len() + 1);
654            let parts = self.0.split_at(pos + 1);
655            new_stream.extend_from_slice(parts.0);
656            new_stream.push(comma);
657            new_stream.extend_from_slice(parts.1);
658            return Some((TokenStream::new(new_stream), sp));
659        }
660        None
661    }
662}
663
664impl FromIterator<TokenTree> for TokenStream {
665    fn from_iter<I: IntoIterator<Item = TokenTree>>(iter: I) -> Self {
666        TokenStream::new(iter.into_iter().collect::<Vec<TokenTree>>())
667    }
668}
669
670impl Eq for TokenStream {}
671
672impl PartialEq<TokenStream> for TokenStream {
673    fn eq(&self, other: &TokenStream) -> bool {
674        self.iter().eq(other.iter())
675    }
676}
677
678impl TokenStream {
679    pub fn new(tts: Vec<TokenTree>) -> TokenStream {
680        TokenStream(Arc::new(tts))
681    }
682
683    pub fn is_empty(&self) -> bool {
684        self.0.is_empty()
685    }
686
687    pub fn len(&self) -> usize {
688        self.0.len()
689    }
690
691    pub fn get(&self, index: usize) -> Option<&TokenTree> {
692        self.0.get(index)
693    }
694
695    pub fn iter(&self) -> TokenStreamIter<'_> {
696        TokenStreamIter::new(self)
697    }
698
699    /// Create a token stream containing a single token with alone spacing. The
700    /// spacing used for the final token in a constructed stream doesn't matter
701    /// because it's never used. In practice we arbitrarily use
702    /// `Spacing::Alone`.
703    pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
704        TokenStream::new(vec![TokenTree::token_alone(kind, span)])
705    }
706
707    pub fn from_ast(node: &(impl HasAttrs + HasTokens + fmt::Debug)) -> TokenStream {
708        let tokens = node.tokens().unwrap_or_else(|| panic!("missing tokens for node: {:?}", node));
709        let mut tts = vec![];
710        attrs_and_tokens_to_token_trees(node.attrs(), tokens, &mut tts);
711        TokenStream::new(tts)
712    }
713
714    // If `vec` is not empty, try to glue `tt` onto its last token. The return
715    // value indicates if gluing took place.
716    fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
717        if let Some(TokenTree::Token(last_tok, Spacing::Joint | Spacing::JointHidden)) = vec.last()
718            && let TokenTree::Token(tok, spacing) = tt
719            && let Some(glued_tok) = last_tok.glue(tok)
720        {
721            // ...then overwrite the last token tree in `vec` with the
722            // glued token, and skip the first token tree from `stream`.
723            *vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
724            true
725        } else {
726            false
727        }
728    }
729
730    /// Push `tt` onto the end of the stream, possibly gluing it to the last
731    /// token. Uses `make_mut` to maximize efficiency.
732    pub fn push_tree(&mut self, tt: TokenTree) {
733        let vec_mut = Arc::make_mut(&mut self.0);
734
735        if Self::try_glue_to_last(vec_mut, &tt) {
736            // nothing else to do
737        } else {
738            vec_mut.push(tt);
739        }
740    }
741
742    /// Push `stream` onto the end of the stream, possibly gluing the first
743    /// token tree to the last token. (No other token trees will be glued.)
744    /// Uses `make_mut` to maximize efficiency.
745    pub fn push_stream(&mut self, stream: TokenStream) {
746        let vec_mut = Arc::make_mut(&mut self.0);
747
748        let stream_iter = stream.0.iter().cloned();
749
750        if let Some(first) = stream.0.first()
751            && Self::try_glue_to_last(vec_mut, first)
752        {
753            // Now skip the first token tree from `stream`.
754            vec_mut.extend(stream_iter.skip(1));
755        } else {
756            // Append all of `stream`.
757            vec_mut.extend(stream_iter);
758        }
759    }
760
761    pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
762        self.0.chunks(chunk_size)
763    }
764
765    /// Desugar doc comments like `/// foo` in the stream into `#[doc =
766    /// r"foo"]`. Modifies the `TokenStream` via `Arc::make_mut`, but as little
767    /// as possible.
768    pub fn desugar_doc_comments(&mut self) {
769        if let Some(desugared_stream) = desugar_inner(self.clone()) {
770            *self = desugared_stream;
771        }
772
773        // The return value is `None` if nothing in `stream` changed.
774        fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
775            let mut i = 0;
776            let mut modified = false;
777            while let Some(tt) = stream.0.get(i) {
778                match tt {
779                    &TokenTree::Token(
780                        Token { kind: token::DocComment(_, attr_style, data), span },
781                        _spacing,
782                    ) => {
783                        let desugared = desugared_tts(attr_style, data, span);
784                        let desugared_len = desugared.len();
785                        Arc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
786                        modified = true;
787                        i += desugared_len;
788                    }
789
790                    &TokenTree::Token(..) => i += 1,
791
792                    &TokenTree::Delimited(sp, spacing, delim, ref delim_stream) => {
793                        if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
794                            let new_tt =
795                                TokenTree::Delimited(sp, spacing, delim, desugared_delim_stream);
796                            Arc::make_mut(&mut stream.0)[i] = new_tt;
797                            modified = true;
798                        }
799                        i += 1;
800                    }
801                }
802            }
803            if modified { Some(stream) } else { None }
804        }
805
806        fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
807            // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
808            // required to wrap the text. E.g.
809            // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
810            // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
811            // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
812            let mut num_of_hashes = 0;
813            let mut count = 0;
814            for ch in data.as_str().chars() {
815                count = match ch {
816                    '"' => 1,
817                    '#' if count > 0 => count + 1,
818                    _ => 0,
819                };
820                num_of_hashes = cmp::max(num_of_hashes, count);
821            }
822
823            // `/// foo` becomes `[doc = r"foo"]`.
824            let delim_span = DelimSpan::from_single(span);
825            let body = TokenTree::Delimited(
826                delim_span,
827                DelimSpacing::new(Spacing::JointHidden, Spacing::Alone),
828                Delimiter::Bracket,
829                [
830                    TokenTree::token_alone(token::Ident(sym::doc, token::IdentIsRaw::No), span),
831                    TokenTree::token_alone(token::Eq, span),
832                    TokenTree::token_alone(
833                        TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
834                        span,
835                    ),
836                ]
837                .into_iter()
838                .collect::<TokenStream>(),
839            );
840
841            if attr_style == AttrStyle::Inner {
842                vec![
843                    TokenTree::token_joint(token::Pound, span),
844                    TokenTree::token_joint_hidden(token::Bang, span),
845                    body,
846                ]
847            } else {
848                vec![TokenTree::token_joint_hidden(token::Pound, span), body]
849            }
850        }
851    }
852}
853
854#[derive(Clone)]
855pub struct TokenStreamIter<'t> {
856    stream: &'t TokenStream,
857    index: usize,
858}
859
860impl<'t> TokenStreamIter<'t> {
861    fn new(stream: &'t TokenStream) -> Self {
862        TokenStreamIter { stream, index: 0 }
863    }
864
865    // Peeking could be done via `Peekable`, but most iterators need peeking,
866    // and this is simple and avoids the need to use `peekable` and `Peekable`
867    // at all the use sites.
868    pub fn peek(&self) -> Option<&'t TokenTree> {
869        self.stream.0.get(self.index)
870    }
871}
872
873impl<'t> Iterator for TokenStreamIter<'t> {
874    type Item = &'t TokenTree;
875
876    fn next(&mut self) -> Option<&'t TokenTree> {
877        self.stream.0.get(self.index).map(|tree| {
878            self.index += 1;
879            tree
880        })
881    }
882}
883
884#[derive(Clone, Debug)]
885pub struct TokenTreeCursor {
886    stream: TokenStream,
887    /// Points to the current token tree in the stream. In `TokenCursor::curr`,
888    /// this can be any token tree. In `TokenCursor::stack`, this is always a
889    /// `TokenTree::Delimited`.
890    index: usize,
891}
892
893impl TokenTreeCursor {
894    #[inline]
895    pub fn new(stream: TokenStream) -> Self {
896        TokenTreeCursor { stream, index: 0 }
897    }
898
899    #[inline]
900    pub fn curr(&self) -> Option<&TokenTree> {
901        self.stream.get(self.index)
902    }
903
904    pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
905        self.stream.get(self.index + n)
906    }
907
908    #[inline]
909    pub fn bump(&mut self) {
910        self.index += 1;
911    }
912}
913
914/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
915/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
916/// use this type to emit them as a linear sequence. But a linear sequence is
917/// what the parser expects, for the most part.
918#[derive(Clone, Debug)]
919pub struct TokenCursor {
920    // Cursor for the current (innermost) token stream. The index within the
921    // cursor can point to any token tree in the stream (or one past the end).
922    // The delimiters for this token stream are found in `self.stack.last()`;
923    // if that is `None` we are in the outermost token stream which never has
924    // delimiters.
925    pub curr: TokenTreeCursor,
926
927    // Token streams surrounding the current one. The index within each cursor
928    // always points to a `TokenTree::Delimited`.
929    pub stack: Vec<TokenTreeCursor>,
930}
931
932impl TokenCursor {
933    pub fn next(&mut self) -> (Token, Spacing) {
934        self.inlined_next()
935    }
936
937    /// This always-inlined version should only be used on hot code paths.
938    #[inline(always)]
939    pub fn inlined_next(&mut self) -> (Token, Spacing) {
940        loop {
941            // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
942            // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
943            // below can be removed.
944            if let Some(tree) = self.curr.curr() {
945                match tree {
946                    &TokenTree::Token(token, spacing) => {
947                        debug_assert!(!token.kind.is_delim());
948                        let res = (token, spacing);
949                        self.curr.bump();
950                        return res;
951                    }
952                    &TokenTree::Delimited(sp, spacing, delim, ref tts) => {
953                        let trees = TokenTreeCursor::new(tts.clone());
954                        self.stack.push(mem::replace(&mut self.curr, trees));
955                        if !delim.skip() {
956                            return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open);
957                        }
958                        // No open delimiter to return; continue on to the next iteration.
959                    }
960                };
961            } else if let Some(parent) = self.stack.pop() {
962                // We have exhausted this token stream. Move back to its parent token stream.
963                let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
964                    panic!("parent should be Delimited")
965                };
966                self.curr = parent;
967                self.curr.bump(); // move past the `Delimited`
968                if !delim.skip() {
969                    return (Token::new(delim.as_close_token_kind(), span.close), spacing.close);
970                }
971                // No close delimiter to return; continue on to the next iteration.
972            } else {
973                // We have exhausted the outermost token stream. The use of
974                // `Spacing::Alone` is arbitrary and immaterial, because the
975                // `Eof` token's spacing is never used.
976                return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
977            }
978        }
979    }
980}
981
982#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
983pub struct DelimSpan {
984    pub open: Span,
985    pub close: Span,
986}
987
988impl DelimSpan {
989    pub fn from_single(sp: Span) -> Self {
990        DelimSpan { open: sp, close: sp }
991    }
992
993    pub fn from_pair(open: Span, close: Span) -> Self {
994        DelimSpan { open, close }
995    }
996
997    pub fn dummy() -> Self {
998        Self::from_single(DUMMY_SP)
999    }
1000
1001    pub fn entire(self) -> Span {
1002        self.open.with_hi(self.close.hi())
1003    }
1004}
1005
1006#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
1007pub struct DelimSpacing {
1008    pub open: Spacing,
1009    pub close: Spacing,
1010}
1011
1012impl DelimSpacing {
1013    pub fn new(open: Spacing, close: Spacing) -> DelimSpacing {
1014        DelimSpacing { open, close }
1015    }
1016}
1017
1018// Some types are used a lot. Make sure they don't unintentionally get bigger.
1019#[cfg(target_pointer_width = "64")]
1020mod size_asserts {
1021    use rustc_data_structures::static_assert_size;
1022
1023    use super::*;
1024    // tidy-alphabetical-start
1025    static_assert_size!(AttrTokenStream, 8);
1026    static_assert_size!(AttrTokenTree, 32);
1027    static_assert_size!(LazyAttrTokenStream, 8);
1028    static_assert_size!(LazyAttrTokenStreamInner, 88);
1029    static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
1030    static_assert_size!(TokenStream, 8);
1031    static_assert_size!(TokenTree, 32);
1032    // tidy-alphabetical-end
1033}