1//! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals
2//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads
3//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
4//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier
5//! fit for Macro-by-Example-style rules.
6//!
7//! (In order to prevent the pathological case, we'd need to lazily construct the resulting
8//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old
9//! matcher positions, but it would also save overhead)
10//!
11//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate.
12//! The macro parser restricts itself to the features of finite state automata. Earley parsers
13//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion.
14//!
15//! Quick intro to how the parser works:
16//!
17//! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually
18//! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`.
19//!
20//! The parser walks through the input a token at a time, maintaining a list
21//! of threads consistent with the current position in the input string: `cur_mps`.
22//!
23//! As it processes them, it fills up `eof_mps` with threads that would be valid if
24//! the macro invocation is now over, `bb_mps` with threads that are waiting on
25//! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting
26//! on a particular token. Most of the logic concerns moving the · through the
27//! repetitions indicated by Kleene stars. The rules for moving the · without
28//! consuming any input are called epsilon transitions. It only advances or calls
29//! out to the real Rust parser when no `cur_mps` threads remain.
30//!
31//! Example:
32//!
33//! ```text, ignore
34//! Start parsing a a a a b against [· a $( a )* a b].
35//!
36//! Remaining input: a a a a b
37//! next: [· a $( a )* a b]
38//!
39//! - - - Advance over an a. - - -
40//!
41//! Remaining input: a a a b
42//! cur: [a · $( a )* a b]
43//! Descend/Skip (first position).
44//! next: [a $( · a )* a b] [a $( a )* · a b].
45//!
46//! - - - Advance over an a. - - -
47//!
48//! Remaining input: a a b
49//! cur: [a $( a · )* a b] [a $( a )* a · b]
50//! Follow epsilon transition: Finish/Repeat (first position)
51//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
52//!
53//! - - - Advance over an a. - - - (this looks exactly like the last step)
54//!
55//! Remaining input: a b
56//! cur: [a $( a · )* a b] [a $( a )* a · b]
57//! Follow epsilon transition: Finish/Repeat (first position)
58//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
59//!
60//! - - - Advance over an a. - - - (this looks exactly like the last step)
61//!
62//! Remaining input: b
63//! cur: [a $( a · )* a b] [a $( a )* a · b]
64//! Follow epsilon transition: Finish/Repeat (first position)
65//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
66//!
67//! - - - Advance over a b. - - -
68//!
69//! Remaining input: ''
70//! eof: [a $( a )* a b ·]
71//! ```
7273use std::borrow::Cow;
74use std::collections::hash_map::Entry::{Occupied, Vacant};
75use std::fmt::Display;
76use std::rc::Rc;
7778pub(crate) use NamedMatch::*;
79pub(crate) use ParseResult::*;
80use rustc_ast::token::{self, DocComment, NonterminalKind, Token, TokenKind};
81use rustc_data_structures::fx::FxHashMap;
82use rustc_errors::ErrorGuaranteed;
83use rustc_lint_defs::pluralize;
84use rustc_parse::parser::{ParseNtResult, Parser, token_descr};
85use rustc_span::{Ident, MacroRulesNormalizedIdent, Span};
8687use crate::mbe::macro_rules::Tracker;
88use crate::mbe::{KleeneOp, TokenTree};
8990/// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from)
91/// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching.
92/// Notable differences to `mbe::TokenTree`:
93/// - It is non-recursive, i.e. there is no nesting.
94/// - The end pieces of each sequence (the separator, if present, and the Kleene op) are
95/// represented explicitly, as is the very end of the matcher.
96///
97/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
98/// simply incrementing the current matcher position index by one.
99#[derive(#[automatically_derived]
impl ::core::fmt::Debug for MatcherLoc {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
match self {
MatcherLoc::Token { token: __self_0 } =>
::core::fmt::Formatter::debug_struct_field1_finish(f, "Token",
"token", &__self_0),
MatcherLoc::Delimited =>
::core::fmt::Formatter::write_str(f, "Delimited"),
MatcherLoc::Sequence {
op: __self_0,
num_metavar_decls: __self_1,
idx_first_after: __self_2,
next_metavar: __self_3,
seq_depth: __self_4 } =>
::core::fmt::Formatter::debug_struct_field5_finish(f,
"Sequence", "op", __self_0, "num_metavar_decls", __self_1,
"idx_first_after", __self_2, "next_metavar", __self_3,
"seq_depth", &__self_4),
MatcherLoc::SequenceKleeneOpNoSep {
op: __self_0, idx_first: __self_1 } =>
::core::fmt::Formatter::debug_struct_field2_finish(f,
"SequenceKleeneOpNoSep", "op", __self_0, "idx_first",
&__self_1),
MatcherLoc::SequenceSep { separator: __self_0 } =>
::core::fmt::Formatter::debug_struct_field1_finish(f,
"SequenceSep", "separator", &__self_0),
MatcherLoc::SequenceKleeneOpAfterSep { idx_first: __self_0 } =>
::core::fmt::Formatter::debug_struct_field1_finish(f,
"SequenceKleeneOpAfterSep", "idx_first", &__self_0),
MatcherLoc::MetaVarDecl {
span: __self_0,
bind: __self_1,
kind: __self_2,
next_metavar: __self_3,
seq_depth: __self_4 } =>
::core::fmt::Formatter::debug_struct_field5_finish(f,
"MetaVarDecl", "span", __self_0, "bind", __self_1, "kind",
__self_2, "next_metavar", __self_3, "seq_depth", &__self_4),
MatcherLoc::Eof => ::core::fmt::Formatter::write_str(f, "Eof"),
}
}
}Debug, #[automatically_derived]
impl ::core::cmp::PartialEq for MatcherLoc {
#[inline]
fn eq(&self, other: &MatcherLoc) -> bool {
let __self_discr = ::core::intrinsics::discriminant_value(self);
let __arg1_discr = ::core::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr &&
match (self, other) {
(MatcherLoc::Token { token: __self_0 }, MatcherLoc::Token {
token: __arg1_0 }) => __self_0 == __arg1_0,
(MatcherLoc::Sequence {
op: __self_0,
num_metavar_decls: __self_1,
idx_first_after: __self_2,
next_metavar: __self_3,
seq_depth: __self_4 }, MatcherLoc::Sequence {
op: __arg1_0,
num_metavar_decls: __arg1_1,
idx_first_after: __arg1_2,
next_metavar: __arg1_3,
seq_depth: __arg1_4 }) =>
__self_0 == __arg1_0 && __self_1 == __arg1_1 &&
__self_2 == __arg1_2 && __self_3 == __arg1_3 &&
__self_4 == __arg1_4,
(MatcherLoc::SequenceKleeneOpNoSep {
op: __self_0, idx_first: __self_1 },
MatcherLoc::SequenceKleeneOpNoSep {
op: __arg1_0, idx_first: __arg1_1 }) =>
__self_0 == __arg1_0 && __self_1 == __arg1_1,
(MatcherLoc::SequenceSep { separator: __self_0 },
MatcherLoc::SequenceSep { separator: __arg1_0 }) =>
__self_0 == __arg1_0,
(MatcherLoc::SequenceKleeneOpAfterSep { idx_first: __self_0 },
MatcherLoc::SequenceKleeneOpAfterSep { idx_first: __arg1_0
}) => __self_0 == __arg1_0,
(MatcherLoc::MetaVarDecl {
span: __self_0,
bind: __self_1,
kind: __self_2,
next_metavar: __self_3,
seq_depth: __self_4 }, MatcherLoc::MetaVarDecl {
span: __arg1_0,
bind: __arg1_1,
kind: __arg1_2,
next_metavar: __arg1_3,
seq_depth: __arg1_4 }) =>
__self_0 == __arg1_0 && __self_1 == __arg1_1 &&
__self_2 == __arg1_2 && __self_3 == __arg1_3 &&
__self_4 == __arg1_4,
_ => true,
}
}
}PartialEq, #[automatically_derived]
impl ::core::clone::Clone for MatcherLoc {
#[inline]
fn clone(&self) -> MatcherLoc {
match self {
MatcherLoc::Token { token: __self_0 } =>
MatcherLoc::Token {
token: ::core::clone::Clone::clone(__self_0),
},
MatcherLoc::Delimited => MatcherLoc::Delimited,
MatcherLoc::Sequence {
op: __self_0,
num_metavar_decls: __self_1,
idx_first_after: __self_2,
next_metavar: __self_3,
seq_depth: __self_4 } =>
MatcherLoc::Sequence {
op: ::core::clone::Clone::clone(__self_0),
num_metavar_decls: ::core::clone::Clone::clone(__self_1),
idx_first_after: ::core::clone::Clone::clone(__self_2),
next_metavar: ::core::clone::Clone::clone(__self_3),
seq_depth: ::core::clone::Clone::clone(__self_4),
},
MatcherLoc::SequenceKleeneOpNoSep {
op: __self_0, idx_first: __self_1 } =>
MatcherLoc::SequenceKleeneOpNoSep {
op: ::core::clone::Clone::clone(__self_0),
idx_first: ::core::clone::Clone::clone(__self_1),
},
MatcherLoc::SequenceSep { separator: __self_0 } =>
MatcherLoc::SequenceSep {
separator: ::core::clone::Clone::clone(__self_0),
},
MatcherLoc::SequenceKleeneOpAfterSep { idx_first: __self_0 } =>
MatcherLoc::SequenceKleeneOpAfterSep {
idx_first: ::core::clone::Clone::clone(__self_0),
},
MatcherLoc::MetaVarDecl {
span: __self_0,
bind: __self_1,
kind: __self_2,
next_metavar: __self_3,
seq_depth: __self_4 } =>
MatcherLoc::MetaVarDecl {
span: ::core::clone::Clone::clone(__self_0),
bind: ::core::clone::Clone::clone(__self_1),
kind: ::core::clone::Clone::clone(__self_2),
next_metavar: ::core::clone::Clone::clone(__self_3),
seq_depth: ::core::clone::Clone::clone(__self_4),
},
MatcherLoc::Eof => MatcherLoc::Eof,
}
}
}Clone)]
100pub(crate) enum MatcherLoc {
101 Token {
102 token: Token,
103 },
104 Delimited,
105 Sequence {
106 op: KleeneOp,
107 num_metavar_decls: usize,
108 idx_first_after: usize,
109 next_metavar: usize,
110 seq_depth: usize,
111 },
112 SequenceKleeneOpNoSep {
113 op: KleeneOp,
114 idx_first: usize,
115 },
116 SequenceSep {
117 separator: Token,
118 },
119 SequenceKleeneOpAfterSep {
120 idx_first: usize,
121 },
122 MetaVarDecl {
123 span: Span,
124 bind: Ident,
125 kind: NonterminalKind,
126 next_metavar: usize,
127 seq_depth: usize,
128 },
129 Eof,
130}
131132impl MatcherLoc {
133pub(super) fn span(&self) -> Option<Span> {
134match self {
135 MatcherLoc::Token { token } => Some(token.span),
136 MatcherLoc::Delimited => None,
137 MatcherLoc::Sequence { .. } => None,
138 MatcherLoc::SequenceKleeneOpNoSep { .. } => None,
139 MatcherLoc::SequenceSep { .. } => None,
140 MatcherLoc::SequenceKleeneOpAfterSep { .. } => None,
141 MatcherLoc::MetaVarDecl { span, .. } => Some(*span),
142 MatcherLoc::Eof => None,
143 }
144 }
145}
146147impl Displayfor MatcherLoc {
148fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149match self {
150 MatcherLoc::Token { token } | MatcherLoc::SequenceSep { separator: token } => {
151f.write_fmt(format_args!("{0}", token_descr(token)))write!(f, "{}", token_descr(token))152 }
153 MatcherLoc::MetaVarDecl { bind, kind, .. } => {
154f.write_fmt(format_args!("meta-variable `${0}:{1}`", bind, kind))write!(f, "meta-variable `${bind}:{kind}`")155 }
156 MatcherLoc::Eof => f.write_str("end of macro"),
157158// These are not printed in the diagnostic
159MatcherLoc::Delimited => f.write_str("delimiter"),
160 MatcherLoc::Sequence { .. } => f.write_str("sequence start"),
161 MatcherLoc::SequenceKleeneOpNoSep { .. } => f.write_str("sequence end"),
162 MatcherLoc::SequenceKleeneOpAfterSep { .. } => f.write_str("sequence end"),
163 }
164 }
165}
166167pub(super) fn compute_locs(matcher: &[TokenTree]) -> Vec<MatcherLoc> {
168fn inner(
169 tts: &[TokenTree],
170 locs: &mut Vec<MatcherLoc>,
171 next_metavar: &mut usize,
172 seq_depth: usize,
173 ) {
174for tt in tts {
175match tt {
176 TokenTree::Token(token) => {
177 locs.push(MatcherLoc::Token { token: *token });
178 }
179 TokenTree::Delimited(span, _, delimited) => {
180let open_token = Token::new(delimited.delim.as_open_token_kind(), span.open);
181let close_token = Token::new(delimited.delim.as_close_token_kind(), span.close);
182183 locs.push(MatcherLoc::Delimited);
184 locs.push(MatcherLoc::Token { token: open_token });
185 inner(&delimited.tts, locs, next_metavar, seq_depth);
186 locs.push(MatcherLoc::Token { token: close_token });
187 }
188 TokenTree::Sequence(_, seq) => {
189// We can't determine `idx_first_after` and construct the final
190 // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
191 // pieces are processed. So we push a dummy value (`Eof` is cheapest to
192 // construct) now, and overwrite it with the proper value below.
193let dummy = MatcherLoc::Eof;
194 locs.push(dummy);
195196let next_metavar_orig = *next_metavar;
197let op = seq.kleene.op;
198let idx_first = locs.len();
199let idx_seq = idx_first - 1;
200 inner(&seq.tts, locs, next_metavar, seq_depth + 1);
201202if let Some(separator) = &seq.separator {
203 locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
204 locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
205 } else {
206 locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
207 }
208209// Overwrite the dummy value pushed above with the proper value.
210locs[idx_seq] = MatcherLoc::Sequence {
211 op,
212 num_metavar_decls: seq.num_captures,
213 idx_first_after: locs.len(),
214 next_metavar: next_metavar_orig,
215 seq_depth,
216 };
217 }
218&TokenTree::MetaVarDecl { span, name: bind, kind } => {
219 locs.push(MatcherLoc::MetaVarDecl {
220 span,
221 bind,
222 kind,
223 next_metavar: *next_metavar,
224 seq_depth,
225 });
226*next_metavar += 1;
227 }
228 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
229 }
230 }
231 }
232233let mut locs = ::alloc::vec::Vec::new()vec![];
234let mut next_metavar = 0;
235inner(matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
236237// A final entry is needed for eof.
238locs.push(MatcherLoc::Eof);
239240locs241}
242243/// A single matcher position, representing the state of matching.
244#[derive(#[automatically_derived]
impl ::core::fmt::Debug for MatcherPos {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field2_finish(f, "MatcherPos",
"idx", &self.idx, "matches", &&self.matches)
}
}Debug)]
245struct MatcherPos {
246/// The index into `TtParser::locs`, which represents the "dot".
247idx: usize,
248249/// The matches made against metavar decls so far. On a successful match, this vector ends up
250 /// with one element per metavar decl in the matcher. Each element records token trees matched
251 /// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if
252 /// the corresponding metavar decl is within a sequence.
253 ///
254 /// It is critical to performance that this is an `Rc`, because it gets cloned frequently when
255 /// processing sequences. Mostly for sequence-ending possibilities that must be tried but end
256 /// up failing.
257matches: Rc<Vec<NamedMatch>>,
258}
259260// This type is used a lot. Make sure it doesn't unintentionally get bigger.
261#[cfg(target_pointer_width = "64")]
262const _: [(); 16] = [(); ::std::mem::size_of::<MatcherPos>()];rustc_data_structures::static_assert_size!(MatcherPos, 16);
263264impl MatcherPos {
265/// Adds `m` as a named match for the `metavar_idx`-th metavar. There are only two call sites,
266 /// and both are hot enough to be always worth inlining.
267#[inline(always)]
268fn push_match(&mut self, metavar_idx: usize, seq_depth: usize, m: NamedMatch) {
269let matches = Rc::make_mut(&mut self.matches);
270match seq_depth {
2710 => {
272// We are not within a sequence. Just append `m`.
273match (&metavar_idx, &matches.len()) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val, &*right_val,
::core::option::Option::None);
}
}
};assert_eq!(metavar_idx, matches.len());
274matches.push(m);
275 }
276_ => {
277// We are within a sequence. Find the final `MatchedSeq` at the appropriate depth
278 // and append `m` to its vector.
279let mut curr = &mut matches[metavar_idx];
280for _ in 0..seq_depth - 1 {
281match curr {
282 MatchedSeq(seq) => curr = seq.last_mut().unwrap(),
283_ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
284 }
285 }
286match curr {
287MatchedSeq(seq) => seq.push(m),
288_ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
289 }
290 }
291 }
292 }
293}
294295enum EofMatcherPositions {
296None,
297 One(MatcherPos),
298 Multiple,
299}
300301/// Represents the possible results of an attempted parse.
302#[derive(#[automatically_derived]
impl<T: ::core::fmt::Debug, F: ::core::fmt::Debug> ::core::fmt::Debug for
ParseResult<T, F> {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
match self {
ParseResult::Success(__self_0) =>
::core::fmt::Formatter::debug_tuple_field1_finish(f,
"Success", &__self_0),
ParseResult::Failure(__self_0) =>
::core::fmt::Formatter::debug_tuple_field1_finish(f,
"Failure", &__self_0),
ParseResult::Error(__self_0, __self_1) =>
::core::fmt::Formatter::debug_tuple_field2_finish(f, "Error",
__self_0, &__self_1),
ParseResult::ErrorReported(__self_0) =>
::core::fmt::Formatter::debug_tuple_field1_finish(f,
"ErrorReported", &__self_0),
}
}
}Debug)]
303pub(crate) enum ParseResult<T, F> {
304/// Parsed successfully.
305Success(T),
306/// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected
307 /// end of macro invocation. Otherwise, it indicates that no rules expected the given token.
308 /// The usize is the approximate position of the token in the input token stream.
309Failure(F),
310/// Fatal error (malformed macro?). Abort compilation.
311Error(rustc_span::Span, String),
312 ErrorReported(ErrorGuaranteed),
313}
314315/// A `ParseResult` where the `Success` variant contains a mapping of
316/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping
317/// of metavars to the token trees they bind to.
318pub(crate) type NamedParseResult<F> = ParseResult<NamedMatches, F>;
319320/// Contains a mapping of `MacroRulesNormalizedIdent`s to `NamedMatch`es.
321/// This represents the mapping of metavars to the token trees they bind to.
322pub(crate) type NamedMatches = FxHashMap<MacroRulesNormalizedIdent, NamedMatch>;
323324/// Count how many metavars declarations are in `matcher`.
325pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize {
326matcher327 .iter()
328 .map(|tt| match tt {
329 TokenTree::MetaVarDecl { .. } => 1,
330 TokenTree::Sequence(_, seq) => seq.num_captures,
331 TokenTree::Delimited(.., delim) => count_metavar_decls(&delim.tts),
332 TokenTree::Token(..) => 0,
333 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
334 })
335 .sum()
336}
337338/// `NamedMatch` is a pattern-match result for a single metavar. All
339/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
340/// (expr, item, etc).
341///
342/// The in-memory structure of a particular `NamedMatch` represents the match
343/// that occurred when a particular subset of a matcher was applied to a
344/// particular token tree.
345///
346/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
347/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
348/// to: each `MatchedSeq` corresponds to a single repetition in the originating
349/// token tree. The depth of the `NamedMatch` structure will therefore depend
350/// only on the nesting depth of repetitions in the originating token tree it
351/// was derived from.
352///
353/// In layperson's terms: `NamedMatch` will form a tree representing nested matches of a particular
354/// meta variable. For example, if we are matching the following macro against the following
355/// invocation...
356///
357/// ```rust
358/// macro_rules! foo {
359/// ($($($x:ident),+);+) => {}
360/// }
361///
362/// foo!(a, b, c, d; a, b, c, d, e);
363/// ```
364///
365/// Then, the tree will have the following shape:
366///
367/// ```ignore (private-internal)
368/// # use NamedMatch::*;
369/// MatchedSeq([
370/// MatchedSeq([
371/// MatchedNonterminal(a),
372/// MatchedNonterminal(b),
373/// MatchedNonterminal(c),
374/// MatchedNonterminal(d),
375/// ]),
376/// MatchedSeq([
377/// MatchedNonterminal(a),
378/// MatchedNonterminal(b),
379/// MatchedNonterminal(c),
380/// MatchedNonterminal(d),
381/// MatchedNonterminal(e),
382/// ])
383/// ])
384/// ```
385#[derive(#[automatically_derived]
impl ::core::fmt::Debug for NamedMatch {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
match self {
NamedMatch::MatchedSeq(__self_0) =>
::core::fmt::Formatter::debug_tuple_field1_finish(f,
"MatchedSeq", &__self_0),
NamedMatch::MatchedSingle(__self_0) =>
::core::fmt::Formatter::debug_tuple_field1_finish(f,
"MatchedSingle", &__self_0),
}
}
}Debug, #[automatically_derived]
impl ::core::clone::Clone for NamedMatch {
#[inline]
fn clone(&self) -> NamedMatch {
match self {
NamedMatch::MatchedSeq(__self_0) =>
NamedMatch::MatchedSeq(::core::clone::Clone::clone(__self_0)),
NamedMatch::MatchedSingle(__self_0) =>
NamedMatch::MatchedSingle(::core::clone::Clone::clone(__self_0)),
}
}
}Clone)]
386pub(crate) enum NamedMatch {
387 MatchedSeq(Vec<NamedMatch>),
388 MatchedSingle(ParseNtResult),
389}
390391impl NamedMatch {
392pub(super) fn is_repeatable(&self) -> bool {
393match self {
394 NamedMatch::MatchedSeq(_) => true,
395 NamedMatch::MatchedSingle(_) => false,
396 }
397 }
398}
399400/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison)
401fn token_name_eq(t1: &Token, t2: &Token) -> bool {
402if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) {
403ident1.name == ident2.name && is_raw1 == is_raw2404 } else if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) =
405 (t1.lifetime(), t2.lifetime())
406 {
407ident1.name == ident2.name && is_raw1 == is_raw2408 } else {
409// Note: we SHOULD NOT use `t1.kind == t2.kind` here, and we should instead compare the
410 // tokens using the special comparison logic below.
411 // It makes sure that variants containing `InvisibleOrigin` will
412 // never compare equal to one another.
413 //
414 // When we had AST-based nonterminals we couldn't compare them, and the
415 // old `Nonterminal` type had an `eq` that always returned false,
416 // resulting in this restriction:
417 // <https://doc.rust-lang.org/nightly/reference/macros-by-example.html#forwarding-a-matched-fragment>
418 // This comparison logic emulates that behaviour. We could consider lifting this
419 // restriction now but there are still cases involving invisible
420 // delimiters that make it harder than it first appears.
421match (t1.kind, t2.kind) {
422 (TokenKind::OpenInvisible(_) | TokenKind::CloseInvisible(_), _)
423 | (_, TokenKind::OpenInvisible(_) | TokenKind::CloseInvisible(_)) => false,
424 (a, b) => a == b,
425 }
426 }
427}
428429// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess
430// allocations we have a single vector for each kind that is cleared and reused repeatedly.
431pub(crate) struct TtParser {
432 macro_name: Ident,
433434/// The set of current mps to be processed. This should be empty by the end of a successful
435 /// execution of `parse_tt_inner`.
436cur_mps: Vec<MatcherPos>,
437438/// The set of newly generated mps. These are used to replenish `cur_mps` in the function
439 /// `parse_tt`.
440next_mps: Vec<MatcherPos>,
441442/// The set of mps that are waiting for the black-box parser.
443bb_mps: Vec<MatcherPos>,
444445/// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules
446 /// that have no metavars.
447empty_matches: Rc<Vec<NamedMatch>>,
448}
449450impl TtParser {
451pub(super) fn new(macro_name: Ident) -> TtParser {
452TtParser {
453macro_name,
454 cur_mps: ::alloc::vec::Vec::new()vec![],
455 next_mps: ::alloc::vec::Vec::new()vec![],
456 bb_mps: ::alloc::vec::Vec::new()vec![],
457 empty_matches: Rc::new(::alloc::vec::Vec::new()vec![]),
458 }
459 }
460461pub(super) fn has_no_remaining_items_for_step(&self) -> bool {
462self.cur_mps.is_empty()
463 }
464465/// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
466 /// produce more mps in `next_mps` and `bb_mps`.
467 ///
468 /// # Returns
469 ///
470 /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept
471 /// track of through the mps generated.
472fn parse_tt_inner<'matcher, T: Tracker<'matcher>>(
473&mut self,
474 matcher: &'matcher [MatcherLoc],
475 token: &Token,
476 approx_position: u32,
477 track: &mut T,
478 ) -> Option<NamedParseResult<T::Failure>> {
479// Matcher positions that would be valid if the macro invocation was over now. Only
480 // modified if `token == Eof`.
481let mut eof_mps = EofMatcherPositions::None;
482483while let Some(mut mp) = self.cur_mps.pop() {
484let matcher_loc = &matcher[mp.idx];
485 track.before_match_loc(self, matcher_loc);
486487match matcher_loc {
488 MatcherLoc::Token { token: t } => {
489// If it's a doc comment, we just ignore it and move on to the next tt in the
490 // matcher. This is a bug, but #95267 showed that existing programs rely on
491 // this behaviour, and changing it would require some care and a transition
492 // period.
493 //
494 // If the token matches, we can just advance the parser.
495 //
496 // Otherwise, this match has failed, there is nothing to do, and hopefully
497 // another mp in `cur_mps` will match.
498if #[allow(non_exhaustive_omitted_patterns)] match t {
Token { kind: DocComment(..), .. } => true,
_ => false,
}matches!(t, Token { kind: DocComment(..), .. }) {
499 mp.idx += 1;
500self.cur_mps.push(mp);
501 } else if token_name_eq(t, token) {
502 mp.idx += 1;
503self.next_mps.push(mp);
504 }
505 }
506 MatcherLoc::Delimited => {
507// Entering the delimiter is trivial.
508mp.idx += 1;
509self.cur_mps.push(mp);
510 }
511&MatcherLoc::Sequence {
512 op,
513 num_metavar_decls,
514 idx_first_after,
515 next_metavar,
516 seq_depth,
517 } => {
518// Install an empty vec for each metavar within the sequence.
519for metavar_idx in next_metavar..next_metavar + num_metavar_decls {
520 mp.push_match(metavar_idx, seq_depth, MatchedSeq(::alloc::vec::Vec::new()vec![]));
521 }
522523if #[allow(non_exhaustive_omitted_patterns)] match op {
KleeneOp::ZeroOrMore | KleeneOp::ZeroOrOne => true,
_ => false,
}matches!(op, KleeneOp::ZeroOrMore | KleeneOp::ZeroOrOne) {
524// Try zero matches of this sequence, by skipping over it.
525self.cur_mps.push(MatcherPos {
526 idx: idx_first_after,
527 matches: Rc::clone(&mp.matches),
528 });
529 }
530531// Try one or more matches of this sequence, by entering it.
532mp.idx += 1;
533self.cur_mps.push(mp);
534 }
535&MatcherLoc::SequenceKleeneOpNoSep { op, idx_first } => {
536// We are past the end of a sequence with no separator. Try ending the
537 // sequence. If that's not possible, `ending_mp` will fail quietly when it is
538 // processed next time around the loop.
539let ending_mp = MatcherPos {
540 idx: mp.idx + 1, // +1 skips the Kleene op
541matches: Rc::clone(&mp.matches),
542 };
543self.cur_mps.push(ending_mp);
544545if op != KleeneOp::ZeroOrOne {
546// Try another repetition.
547mp.idx = idx_first;
548self.cur_mps.push(mp);
549 }
550 }
551 MatcherLoc::SequenceSep { separator } => {
552// We are past the end of a sequence with a separator but we haven't seen the
553 // separator yet. Try ending the sequence. If that's not possible, `ending_mp`
554 // will fail quietly when it is processed next time around the loop.
555let ending_mp = MatcherPos {
556 idx: mp.idx + 2, // +2 skips the separator and the Kleene op
557matches: Rc::clone(&mp.matches),
558 };
559self.cur_mps.push(ending_mp);
560561if token_name_eq(token, separator) {
562// The separator matches the current token. Advance past it.
563mp.idx += 1;
564self.next_mps.push(mp);
565 }
566 }
567&MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
568// We are past the sequence separator. This can't be a `?` Kleene op, because
569 // they don't permit separators. Try another repetition.
570mp.idx = idx_first;
571self.cur_mps.push(mp);
572 }
573&MatcherLoc::MetaVarDecl { kind, .. } => {
574// Built-in nonterminals never start with these tokens, so we can eliminate
575 // them from consideration. We use the span of the metavariable declaration
576 // to determine any edition-specific matching behavior for non-terminals.
577if Parser::nonterminal_may_begin_with(kind, token) {
578self.bb_mps.push(mp);
579 }
580 }
581 MatcherLoc::Eof => {
582// We are past the matcher's end, and not in a sequence. Try to end things.
583if true {
match (&mp.idx, &(matcher.len() - 1)) {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(mp.idx, matcher.len() - 1);
584if *token == token::Eof {
585 eof_mps = match eof_mps {
586 EofMatcherPositions::None => EofMatcherPositions::One(mp),
587 EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => {
588 EofMatcherPositions::Multiple
589 }
590 }
591 }
592 }
593 }
594 }
595596// If we reached the end of input, check that there is EXACTLY ONE possible matcher.
597 // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error.
598if *token == token::Eof {
599Some(match eof_mps {
600 EofMatcherPositions::One(mut eof_mp) => {
601// Need to take ownership of the matches from within the `Rc`.
602Rc::make_mut(&mut eof_mp.matches);
603let matches = Rc::try_unwrap(eof_mp.matches).unwrap().into_iter();
604self.nameize(matcher, matches)
605 }
606 EofMatcherPositions::Multiple => {
607Error(token.span, "ambiguity: multiple successful parses".to_string())
608 }
609 EofMatcherPositions::None => Failure(T::build_failure(
610Token::new(
611 token::Eof,
612if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() },
613 ),
614approx_position,
615"missing tokens in macro arguments",
616 )),
617 })
618 } else {
619None620 }
621 }
622623/// Match the token stream from `parser` against `matcher`.
624pub(super) fn parse_tt<'matcher, T: Tracker<'matcher>>(
625&mut self,
626 parser: &mut Cow<'_, Parser<'_>>,
627 matcher: &'matcher [MatcherLoc],
628 track: &mut T,
629 ) -> NamedParseResult<T::Failure> {
630// A queue of possible matcher positions. We initialize it with the matcher position in
631 // which the "dot" is before the first token of the first token tree in `matcher`.
632 // `parse_tt_inner` then processes all of these possible matcher positions and produces
633 // possible next positions into `next_mps`. After some post-processing, the contents of
634 // `next_mps` replenish `cur_mps` and we start over again.
635self.cur_mps.clear();
636self.cur_mps.push(MatcherPos { idx: 0, matches: Rc::clone(&self.empty_matches) });
637638loop {
639self.next_mps.clear();
640self.bb_mps.clear();
641642// Process `cur_mps` until either we have finished the input or we need to get some
643 // parsing from the black-box parser done.
644let res = self.parse_tt_inner(
645matcher,
646&parser.token,
647parser.approx_token_stream_pos(),
648track,
649 );
650651if let Some(res) = res {
652return res;
653 }
654655// `parse_tt_inner` handled all of `cur_mps`, so it's empty.
656if !self.cur_mps.is_empty() {
::core::panicking::panic("assertion failed: self.cur_mps.is_empty()")
};assert!(self.cur_mps.is_empty());
657658// Error messages here could be improved with links to original rules.
659match (self.next_mps.len(), self.bb_mps.len()) {
660 (0, 0) => {
661// There are no possible next positions AND we aren't waiting for the black-box
662 // parser: syntax error.
663return Failure(T::build_failure(
664parser.token,
665parser.approx_token_stream_pos(),
666"no rules expected this token in macro call",
667 ));
668 }
669670 (_, 0) => {
671// Dump all possible `next_mps` into `cur_mps` for the next iteration. Then
672 // process the next token.
673self.cur_mps.append(&mut self.next_mps);
674parser.to_mut().bump();
675 }
676677 (0, 1) => {
678// We need to call the black-box parser to get some nonterminal.
679let mut mp = self.bb_mps.pop().unwrap();
680let loc = &matcher[mp.idx];
681if let &MatcherLoc::MetaVarDecl {
682 span, kind, next_metavar, seq_depth, ..
683 } = loc684 {
685// We use the span of the metavariable declaration to determine any
686 // edition-specific matching behavior for non-terminals.
687let nt = match parser.to_mut().parse_nonterminal(kind) {
688Err(err) => {
689let guarantee = err.with_span_label(
690span,
691::alloc::__export::must_use({
::alloc::fmt::format(format_args!("while parsing argument for this `{0}` macro fragment",
kind))
})format!(
692"while parsing argument for this `{kind}` macro fragment"
693),
694 )
695 .emit();
696return ErrorReported(guarantee);
697 }
698Ok(nt) => nt,
699 };
700mp.push_match(next_metavar, seq_depth, MatchedSingle(nt));
701mp.idx += 1;
702 } else {
703::core::panicking::panic("internal error: entered unreachable code")unreachable!()704 }
705self.cur_mps.push(mp);
706 }
707708 (_, _) => {
709// Too many possibilities!
710return self.ambiguity_error(matcher, parser.token.span);
711 }
712 }
713714if !!self.cur_mps.is_empty() {
::core::panicking::panic("assertion failed: !self.cur_mps.is_empty()")
};assert!(!self.cur_mps.is_empty());
715 }
716 }
717718fn ambiguity_error<F>(
719&self,
720 matcher: &[MatcherLoc],
721 token_span: rustc_span::Span,
722 ) -> NamedParseResult<F> {
723let nts = self724 .bb_mps
725 .iter()
726 .map(|mp| match &matcher[mp.idx] {
727 MatcherLoc::MetaVarDecl { bind, kind, .. } => {
728::alloc::__export::must_use({
::alloc::fmt::format(format_args!("{0} (\'{1}\')", kind, bind))
})format!("{kind} ('{bind}')")729 }
730_ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
731 })
732 .collect::<Vec<String>>()
733 .join(" or ");
734735Error(
736token_span,
737::alloc::__export::must_use({
::alloc::fmt::format(format_args!("local ambiguity when calling macro `{0}`: multiple parsing options: {1}",
self.macro_name,
match self.next_mps.len() {
0 =>
::alloc::__export::must_use({
::alloc::fmt::format(format_args!("built-in NTs {0}.", nts))
}),
n =>
::alloc::__export::must_use({
::alloc::fmt::format(format_args!("built-in NTs {1} or {2} other option{0}.",
if n == 1 { "" } else { "s" }, nts, n))
}),
}))
})format!(
738"local ambiguity when calling macro `{}`: multiple parsing options: {}",
739self.macro_name,
740match self.next_mps.len() {
7410 => format!("built-in NTs {nts}."),
742 n => format!("built-in NTs {nts} or {n} other option{s}.", s = pluralize!(n)),
743 }
744 ),
745 )
746 }
747748fn nameize<I: Iterator<Item = NamedMatch>, F>(
749&self,
750 matcher: &[MatcherLoc],
751mut res: I,
752 ) -> NamedParseResult<F> {
753// Make that each metavar has _exactly one_ binding. If so, insert the binding into the
754 // `NamedParseResult`. Otherwise, it's an error.
755let mut ret_val = FxHashMap::default();
756for loc in matcher {
757if let &MatcherLoc::MetaVarDecl { span, bind, .. } = loc {
758match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
759 Vacant(spot) => spot.insert(res.next().unwrap()),
760 Occupied(..) => {
761return Error(span, ::alloc::__export::must_use({
::alloc::fmt::format(format_args!("duplicated bind name: {0}", bind))
})format!("duplicated bind name: {bind}"));
762 }
763 };
764 }
765 }
766Success(ret_val)
767 }
768}