rustc_parse/parser/
attr_wrapper.rs

1use std::borrow::Cow;
2use std::mem;
3
4use rustc_ast::token::Token;
5use rustc_ast::tokenstream::{
6    AttrsTarget, LazyAttrTokenStream, NodeRange, ParserRange, Spacing, TokenCursor,
7};
8use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens};
9use rustc_data_structures::fx::FxHashSet;
10use rustc_errors::PResult;
11use rustc_session::parse::ParseSess;
12use rustc_span::{DUMMY_SP, sym};
13use thin_vec::ThinVec;
14
15use super::{Capturing, ForceCollect, Parser, Trailing};
16
17// When collecting tokens, this fully captures the start point. Usually its
18// just after outer attributes, but occasionally it's before.
19#[derive(Clone, Debug)]
20pub(super) struct CollectPos {
21    start_token: (Token, Spacing),
22    cursor_snapshot: TokenCursor,
23    start_pos: u32,
24}
25
26pub(super) enum UsePreAttrPos {
27    No,
28    Yes,
29}
30
31/// A wrapper type to ensure that the parser handles outer attributes correctly.
32/// When we parse outer attributes, we need to ensure that we capture tokens
33/// for the attribute target. This allows us to perform cfg-expansion on
34/// a token stream before we invoke a derive proc-macro.
35///
36/// This wrapper prevents direct access to the underlying `ast::AttrVec`.
37/// Parsing code can only get access to the underlying attributes
38/// by passing an `AttrWrapper` to `collect_tokens`.
39/// This makes it difficult to accidentally construct an AST node
40/// (which stores an `ast::AttrVec`) without first collecting tokens.
41///
42/// This struct has its own module, to ensure that the parser code
43/// cannot directly access the `attrs` field.
44#[derive(Debug, Clone)]
45pub(super) struct AttrWrapper {
46    attrs: AttrVec,
47    // The start of the outer attributes in the parser's token stream.
48    // This lets us create a `NodeReplacement` for the entire attribute
49    // target, including outer attributes. `None` if there are no outer
50    // attributes.
51    start_pos: Option<u32>,
52}
53
54impl AttrWrapper {
55    pub(super) fn new(attrs: AttrVec, start_pos: u32) -> AttrWrapper {
56        AttrWrapper { attrs, start_pos: Some(start_pos) }
57    }
58
59    pub(super) fn empty() -> AttrWrapper {
60        AttrWrapper { attrs: AttrVec::new(), start_pos: None }
61    }
62
63    pub(super) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
64        psess.dcx().span_delayed_bug(
65            self.attrs.get(0).map(|attr| attr.span).unwrap_or(DUMMY_SP),
66            "AttrVec is taken for recovery but no error is produced",
67        );
68
69        self.attrs
70    }
71
72    /// Prepend `self.attrs` to `attrs`.
73    // FIXME: require passing an NT to prevent misuse of this method
74    pub(super) fn prepend_to_nt_inner(mut self, attrs: &mut AttrVec) {
75        mem::swap(attrs, &mut self.attrs);
76        attrs.extend(self.attrs);
77    }
78
79    pub(super) fn is_empty(&self) -> bool {
80        self.attrs.is_empty()
81    }
82}
83
84/// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute
85fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
86    // NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports.
87    // Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that
88    // we don't need to do any eager expansion.
89    attrs.iter().any(|attr| {
90        attr.ident().is_some_and(|ident| ident.name == sym::cfg || ident.name == sym::cfg_attr)
91    })
92}
93
94impl<'a> Parser<'a> {
95    pub(super) fn collect_pos(&self) -> CollectPos {
96        CollectPos {
97            start_token: (self.token, self.token_spacing),
98            cursor_snapshot: self.token_cursor.clone(),
99            start_pos: self.num_bump_calls,
100        }
101    }
102
103    /// Parses code with `f`. If appropriate, it records the tokens (in
104    /// `LazyAttrTokenStream` form) that were parsed in the result, accessible
105    /// via the `HasTokens` trait. The `Trailing` part of the callback's
106    /// result indicates if an extra token should be captured, e.g. a comma or
107    /// semicolon. The `UsePreAttrPos` part of the callback's result indicates
108    /// if we should use `pre_attr_pos` as the collection start position (only
109    /// required in a few cases).
110    ///
111    /// The `attrs` passed in are in `AttrWrapper` form, which is opaque. The
112    /// `AttrVec` within is passed to `f`. See the comment on `AttrWrapper` for
113    /// details.
114    ///
115    /// `pre_attr_pos` is the position before the outer attributes (or the node
116    /// itself, if no outer attributes are present). It is only needed if `f`
117    /// can return `UsePreAttrPos::Yes`.
118    ///
119    /// Note: If your callback consumes an opening delimiter (including the
120    /// case where `self.token` is an opening delimiter on entry to this
121    /// function), you must also consume the corresponding closing delimiter.
122    /// E.g. you can consume `something ([{ }])` or `([{}])`, but not `([{}]`.
123    /// This restriction isn't a problem in practice, because parsed AST items
124    /// always have matching delimiters.
125    ///
126    /// The following example code will be used to explain things in comments
127    /// below. It has an outer attribute and an inner attribute. Parsing it
128    /// involves two calls to this method, one of which is indirectly
129    /// recursive.
130    /// ```ignore (fake attributes)
131    /// #[cfg_eval]                         // token pos
132    /// mod m {                             //   0.. 3
133    ///     #[cfg_attr(cond1, attr1)]       //   3..12
134    ///     fn g() {                        //  12..17
135    ///         #![cfg_attr(cond2, attr2)]  //  17..27
136    ///         let _x = 3;                 //  27..32
137    ///     }                               //  32..33
138    /// }                                   //  33..34
139    /// ```
140    pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
141        &mut self,
142        pre_attr_pos: Option<CollectPos>,
143        attrs: AttrWrapper,
144        force_collect: ForceCollect,
145        f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
146    ) -> PResult<'a, R> {
147        let possible_capture_mode = self.capture_cfg;
148
149        // We must collect if anything could observe the collected tokens, i.e.
150        // if any of the following conditions hold.
151        // - We are force collecting tokens (because force collection requires
152        //   tokens by definition).
153        let needs_collection = matches!(force_collect, ForceCollect::Yes)
154            // - Any of our outer attributes require tokens.
155            || needs_tokens(&attrs.attrs)
156            // - Our target supports custom inner attributes (custom
157            //   inner attribute invocation might require token capturing).
158            || R::SUPPORTS_CUSTOM_INNER_ATTRS
159            // - We are in "possible capture mode" (which requires tokens if
160            //   the parsed node has `#[cfg]` or `#[cfg_attr]` attributes).
161            || possible_capture_mode;
162        if !needs_collection {
163            return Ok(f(self, attrs.attrs)?.0);
164        }
165
166        let mut collect_pos = self.collect_pos();
167        let has_outer_attrs = !attrs.attrs.is_empty();
168        let parser_replacements_start = self.capture_state.parser_replacements.len();
169
170        // We set and restore `Capturing::Yes` on either side of the call to
171        // `f`, so we can distinguish the outermost call to `collect_tokens`
172        // (e.g. parsing `m` in the example above) from any inner (indirectly
173        // recursive) calls (e.g. parsing `g` in the example above). This
174        // distinction is used below and in `Parser::parse_inner_attributes`.
175        let (mut ret, capture_trailing, use_pre_attr_pos) = {
176            let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
177            let res = f(self, attrs.attrs);
178            self.capture_state.capturing = prev_capturing;
179            res?
180        };
181
182        // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
183        // - `Some(None)`: Our target supports tokens and has none.
184        // - `Some(Some(_))`: Our target already has tokens set (e.g. we've
185        //   parsed something like `#[my_attr] $item`).
186        let ret_can_hold_tokens = matches!(ret.tokens_mut(), Some(None));
187
188        // Ignore any attributes we've previously processed. This happens when
189        // an inner call to `collect_tokens` returns an AST node and then an
190        // outer call ends up with the same AST node without any additional
191        // wrapping layer.
192        let mut seen_indices = FxHashSet::default();
193        for (i, attr) in ret.attrs().iter().enumerate() {
194            let is_unseen = self.capture_state.seen_attrs.insert(attr.id);
195            if !is_unseen {
196                seen_indices.insert(i);
197            }
198        }
199        let ret_attrs: Cow<'_, [Attribute]> =
200            if seen_indices.is_empty() {
201                Cow::Borrowed(ret.attrs())
202            } else {
203                let ret_attrs =
204                    ret.attrs()
205                        .iter()
206                        .enumerate()
207                        .filter_map(|(i, attr)| {
208                            if seen_indices.contains(&i) { None } else { Some(attr.clone()) }
209                        })
210                        .collect();
211                Cow::Owned(ret_attrs)
212            };
213
214        // When we're not in "definite capture mode", then skip collecting and
215        // return early if `ret` doesn't support tokens or already has some.
216        //
217        // Note that this check is independent of `force_collect`. There's no
218        // need to collect tokens when we don't support tokens or already have
219        // tokens.
220        let definite_capture_mode = self.capture_cfg
221            && matches!(self.capture_state.capturing, Capturing::Yes)
222            && has_cfg_or_cfg_attr(&ret_attrs);
223        if !definite_capture_mode && !ret_can_hold_tokens {
224            return Ok(ret);
225        }
226
227        // This is similar to the `needs_collection` check at the start of this
228        // function, but now that we've parsed an AST node we have complete
229        // information available. (If we return early here that means the
230        // setup, such as cloning the token cursor, was unnecessary. That's
231        // hard to avoid.)
232        //
233        // We must collect if anything could observe the collected tokens, i.e.
234        // if any of the following conditions hold.
235        // - We are force collecting tokens.
236        let needs_collection = matches!(force_collect, ForceCollect::Yes)
237            // - Any of our outer *or* inner attributes require tokens.
238            //   (`attr.attrs` was just outer attributes, but `ret.attrs()` is
239            //   outer and inner attributes. So this check is more precise than
240            //   the earlier `needs_tokens` check, and we don't need to
241            //   check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
242            || needs_tokens(&ret_attrs)
243            // - We are in "definite capture mode", which requires that there
244            //   are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
245            //   non-`capture_cfg` parsing, we don't need any special capturing
246            //   for those attributes, because they're builtin.)
247            || definite_capture_mode;
248        if !needs_collection {
249            return Ok(ret);
250        }
251
252        // Replace the post-attribute collection start position with the
253        // pre-attribute position supplied, if `f` indicated it is necessary.
254        // (The caller is responsible for providing a non-`None` `pre_attr_pos`
255        // if this is a possibility.)
256        if matches!(use_pre_attr_pos, UsePreAttrPos::Yes) {
257            collect_pos = pre_attr_pos.unwrap();
258        }
259
260        let parser_replacements_end = self.capture_state.parser_replacements.len();
261
262        assert!(
263            !(self.break_last_token > 0 && matches!(capture_trailing, Trailing::Yes)),
264            "Cannot have break_last_token > 0 and have trailing token"
265        );
266        assert!(self.break_last_token <= 2, "cannot break token more than twice");
267
268        let end_pos = self.num_bump_calls
269            + capture_trailing as u32
270            // If we "broke" the last token (e.g. breaking a `>>` token once into `>` + `>`, or
271            // breaking a `>>=` token twice into `>` + `>` + `=`), then extend the range of
272            // captured tokens to include it, because the parser was not actually bumped past it.
273            // (Even if we broke twice, it was still just one token originally, hence the `1`.)
274            // When the `LazyAttrTokenStream` gets converted into an `AttrTokenStream`, we will
275            // rebreak that final token once or twice.
276            + if self.break_last_token == 0 { 0 } else { 1 };
277
278        let num_calls = end_pos - collect_pos.start_pos;
279
280        // Take the captured `ParserRange`s for any inner attributes that we parsed in
281        // `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
282        // which means the relevant tokens will be removed. (More details below.)
283        let mut inner_attr_parser_replacements = Vec::new();
284        for attr in ret_attrs.iter() {
285            if attr.style == ast::AttrStyle::Inner {
286                if let Some(inner_attr_parser_range) =
287                    self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
288                {
289                    inner_attr_parser_replacements.push((inner_attr_parser_range, None));
290                } else {
291                    self.dcx().span_delayed_bug(attr.span, "Missing token range for attribute");
292                }
293            }
294        }
295
296        // This is hot enough for `deep-vector` that checking the conditions for an empty iterator
297        // is measurably faster than actually executing the iterator.
298        let node_replacements = if parser_replacements_start == parser_replacements_end
299            && inner_attr_parser_replacements.is_empty()
300        {
301            ThinVec::new()
302        } else {
303            // Grab any replace ranges that occur *inside* the current AST node. Convert them
304            // from `ParserRange` form to `NodeRange` form. We will perform the actual
305            // replacement only when we convert the `LazyAttrTokenStream` to an
306            // `AttrTokenStream`.
307            self.capture_state.parser_replacements
308                [parser_replacements_start..parser_replacements_end]
309                .iter()
310                .cloned()
311                .chain(inner_attr_parser_replacements)
312                .map(|(parser_range, data)| {
313                    (NodeRange::new(parser_range, collect_pos.start_pos), data)
314                })
315                .collect()
316        };
317
318        // What is the status here when parsing the example code at the top of this method?
319        //
320        // When parsing `g`:
321        // - `start_pos..end_pos` is `12..33` (`fn g { ... }`, excluding the outer attr).
322        // - `inner_attr_parser_replacements` has one entry (`ParserRange(17..27)`), to
323        //   delete the inner attr's tokens.
324        //   - This entry is converted to `NodeRange(5..15)` (relative to the `fn`) and put into
325        //     the lazy tokens for `g`, i.e. deleting the inner attr from those tokens (if they get
326        //     evaluated).
327        //   - Those lazy tokens are also put into an `AttrsTarget` that is appended to `self`'s
328        //     replace ranges at the bottom of this function, for processing when parsing `m`.
329        // - `parser_replacements_start..parser_replacements_end` is empty.
330        //
331        // When parsing `m`:
332        // - `start_pos..end_pos` is `0..34` (`mod m`, excluding the `#[cfg_eval]` attribute).
333        // - `inner_attr_parser_replacements` is empty.
334        // - `parser_replacements_start..parser_replacements_end` has one entry.
335        //   - One `AttrsTarget` (added below when parsing `g`) to replace all of `g` (`3..33`,
336        //     including its outer attribute), with:
337        //     - `attrs`: includes the outer and the inner attr.
338        //     - `tokens`: lazy tokens for `g` (with its inner attr deleted).
339
340        let tokens = LazyAttrTokenStream::new_pending(
341            collect_pos.start_token,
342            collect_pos.cursor_snapshot,
343            num_calls,
344            self.break_last_token,
345            node_replacements,
346        );
347        let mut tokens_used = false;
348
349        // If in "definite capture mode" we need to register a replace range
350        // for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
351        // eager cfg-expansion on the captured token stream.
352        if definite_capture_mode {
353            assert!(self.break_last_token == 0, "Should not have unglued last token with cfg attr");
354
355            // What is the status here when parsing the example code at the top of this method?
356            //
357            // When parsing `g`, we add one entry:
358            // - The pushed entry (`ParserRange(3..33)`) has a new `AttrsTarget` with:
359            //   - `attrs`: includes the outer and the inner attr.
360            //   - `tokens`: lazy tokens for `g` (with its inner attr deleted).
361            //
362            // When parsing `m`, we do nothing here.
363
364            // Set things up so that the entire AST node that we just parsed, including attributes,
365            // will be replaced with `target` in the lazy token stream. This will allow us to
366            // cfg-expand this AST node.
367            let start_pos =
368                if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
369            let target =
370                AttrsTarget { attrs: ret_attrs.iter().cloned().collect(), tokens: tokens.clone() };
371            tokens_used = true;
372            self.capture_state
373                .parser_replacements
374                .push((ParserRange(start_pos..end_pos), Some(target)));
375        } else if matches!(self.capture_state.capturing, Capturing::No) {
376            // Only clear the ranges once we've finished capturing entirely, i.e. we've finished
377            // the outermost call to this method.
378            self.capture_state.parser_replacements.clear();
379            self.capture_state.inner_attr_parser_ranges.clear();
380            self.capture_state.seen_attrs.clear();
381        }
382
383        // If we support tokens and don't already have them, store the newly captured tokens.
384        if let Some(target_tokens @ None) = ret.tokens_mut() {
385            tokens_used = true;
386            *target_tokens = Some(tokens);
387        }
388
389        assert!(tokens_used); // check we didn't create `tokens` unnecessarily
390        Ok(ret)
391    }
392}
393
394/// Tokens are needed if:
395/// - any non-single-segment attributes (other than doc comments) are present,
396///   e.g. `rustfmt::skip`; or
397/// - any `cfg_attr` attributes are present; or
398/// - any single-segment, non-builtin attributes are present, e.g. `derive`,
399///   `test`, `global_allocator`.
400fn needs_tokens(attrs: &[ast::Attribute]) -> bool {
401    attrs.iter().any(|attr| match attr.ident() {
402        None => !attr.is_doc_comment(),
403        Some(ident) => {
404            ident.name == sym::cfg_attr || !rustc_feature::is_builtin_attr_name(ident.name)
405        }
406    })
407}
rustc_parse/parser/attr_wrapper.rs

rustc_parse/parser/
attr_wrapper.rs