rustc_expand/mbe/macro_parser.rs
1//! This is an NFA-based parser, which calls out to the main Rust parser for named non-terminals
2//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads
3//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
4//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier
5//! fit for Macro-by-Example-style rules.
6//!
7//! (In order to prevent the pathological case, we'd need to lazily construct the resulting
8//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old
9//! matcher positions, but it would also save overhead)
10//!
11//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate.
12//! The macro parser restricts itself to the features of finite state automata. Earley parsers
13//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion.
14//!
15//! Quick intro to how the parser works:
16//!
17//! A "matcher position" (a.k.a. "position" or "mp") is a dot in the middle of a matcher, usually
18//! written as a `·`. For example `· a $( a )* a b` is one, as is `a $( · a )* a b`.
19//!
20//! The parser walks through the input a token at a time, maintaining a list
21//! of threads consistent with the current position in the input string: `cur_mps`.
22//!
23//! As it processes them, it fills up `eof_mps` with threads that would be valid if
24//! the macro invocation is now over, `bb_mps` with threads that are waiting on
25//! a Rust non-terminal like `$e:expr`, and `next_mps` with threads that are waiting
26//! on a particular token. Most of the logic concerns moving the · through the
27//! repetitions indicated by Kleene stars. The rules for moving the · without
28//! consuming any input are called epsilon transitions. It only advances or calls
29//! out to the real Rust parser when no `cur_mps` threads remain.
30//!
31//! Example:
32//!
33//! ```text, ignore
34//! Start parsing a a a a b against [· a $( a )* a b].
35//!
36//! Remaining input: a a a a b
37//! next: [· a $( a )* a b]
38//!
39//! - - - Advance over an a. - - -
40//!
41//! Remaining input: a a a b
42//! cur: [a · $( a )* a b]
43//! Descend/Skip (first position).
44//! next: [a $( · a )* a b] [a $( a )* · a b].
45//!
46//! - - - Advance over an a. - - -
47//!
48//! Remaining input: a a b
49//! cur: [a $( a · )* a b] [a $( a )* a · b]
50//! Follow epsilon transition: Finish/Repeat (first position)
51//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
52//!
53//! - - - Advance over an a. - - - (this looks exactly like the last step)
54//!
55//! Remaining input: a b
56//! cur: [a $( a · )* a b] [a $( a )* a · b]
57//! Follow epsilon transition: Finish/Repeat (first position)
58//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
59//!
60//! - - - Advance over an a. - - - (this looks exactly like the last step)
61//!
62//! Remaining input: b
63//! cur: [a $( a · )* a b] [a $( a )* a · b]
64//! Follow epsilon transition: Finish/Repeat (first position)
65//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
66//!
67//! - - - Advance over a b. - - -
68//!
69//! Remaining input: ''
70//! eof: [a $( a )* a b ·]
71//! ```
72
73use std::borrow::Cow;
74use std::collections::hash_map::Entry::{Occupied, Vacant};
75use std::fmt::Display;
76use std::rc::Rc;
77
78pub(crate) use NamedMatch::*;
79pub(crate) use ParseResult::*;
80use rustc_ast::token::{self, DocComment, NonterminalKind, Token};
81use rustc_data_structures::fx::FxHashMap;
82use rustc_errors::ErrorGuaranteed;
83use rustc_lint_defs::pluralize;
84use rustc_parse::parser::{ParseNtResult, Parser, token_descr};
85use rustc_span::{Ident, MacroRulesNormalizedIdent, Span};
86
87use crate::mbe::macro_rules::Tracker;
88use crate::mbe::{KleeneOp, TokenTree};
89
90/// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from)
91/// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching.
92/// Notable differences to `mbe::TokenTree`:
93/// - It is non-recursive, i.e. there is no nesting.
94/// - The end pieces of each sequence (the separator, if present, and the Kleene op) are
95/// represented explicitly, as is the very end of the matcher.
96///
97/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
98/// simply incrementing the current matcher position index by one.
99#[derive(Debug, PartialEq, Clone)]
100pub(crate) enum MatcherLoc {
101 Token {
102 token: Token,
103 },
104 Delimited,
105 Sequence {
106 op: KleeneOp,
107 num_metavar_decls: usize,
108 idx_first_after: usize,
109 next_metavar: usize,
110 seq_depth: usize,
111 },
112 SequenceKleeneOpNoSep {
113 op: KleeneOp,
114 idx_first: usize,
115 },
116 SequenceSep {
117 separator: Token,
118 },
119 SequenceKleeneOpAfterSep {
120 idx_first: usize,
121 },
122 MetaVarDecl {
123 span: Span,
124 bind: Ident,
125 kind: Option<NonterminalKind>,
126 next_metavar: usize,
127 seq_depth: usize,
128 },
129 Eof,
130}
131
132impl MatcherLoc {
133 pub(super) fn span(&self) -> Option<Span> {
134 match self {
135 MatcherLoc::Token { token } => Some(token.span),
136 MatcherLoc::Delimited => None,
137 MatcherLoc::Sequence { .. } => None,
138 MatcherLoc::SequenceKleeneOpNoSep { .. } => None,
139 MatcherLoc::SequenceSep { .. } => None,
140 MatcherLoc::SequenceKleeneOpAfterSep { .. } => None,
141 MatcherLoc::MetaVarDecl { span, .. } => Some(*span),
142 MatcherLoc::Eof => None,
143 }
144 }
145}
146
147impl Display for MatcherLoc {
148 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149 match self {
150 MatcherLoc::Token { token } | MatcherLoc::SequenceSep { separator: token } => {
151 write!(f, "{}", token_descr(token))
152 }
153 MatcherLoc::MetaVarDecl { bind, kind, .. } => {
154 write!(f, "meta-variable `${bind}")?;
155 if let Some(kind) = kind {
156 write!(f, ":{kind}")?;
157 }
158 write!(f, "`")?;
159 Ok(())
160 }
161 MatcherLoc::Eof => f.write_str("end of macro"),
162
163 // These are not printed in the diagnostic
164 MatcherLoc::Delimited => f.write_str("delimiter"),
165 MatcherLoc::Sequence { .. } => f.write_str("sequence start"),
166 MatcherLoc::SequenceKleeneOpNoSep { .. } => f.write_str("sequence end"),
167 MatcherLoc::SequenceKleeneOpAfterSep { .. } => f.write_str("sequence end"),
168 }
169 }
170}
171
172pub(super) fn compute_locs(matcher: &[TokenTree]) -> Vec<MatcherLoc> {
173 fn inner(
174 tts: &[TokenTree],
175 locs: &mut Vec<MatcherLoc>,
176 next_metavar: &mut usize,
177 seq_depth: usize,
178 ) {
179 for tt in tts {
180 match tt {
181 TokenTree::Token(token) => {
182 locs.push(MatcherLoc::Token { token: token.clone() });
183 }
184 TokenTree::Delimited(span, _, delimited) => {
185 let open_token = Token::new(token::OpenDelim(delimited.delim), span.open);
186 let close_token = Token::new(token::CloseDelim(delimited.delim), span.close);
187
188 locs.push(MatcherLoc::Delimited);
189 locs.push(MatcherLoc::Token { token: open_token });
190 inner(&delimited.tts, locs, next_metavar, seq_depth);
191 locs.push(MatcherLoc::Token { token: close_token });
192 }
193 TokenTree::Sequence(_, seq) => {
194 // We can't determine `idx_first_after` and construct the final
195 // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
196 // pieces are processed. So we push a dummy value (`Eof` is cheapest to
197 // construct) now, and overwrite it with the proper value below.
198 let dummy = MatcherLoc::Eof;
199 locs.push(dummy);
200
201 let next_metavar_orig = *next_metavar;
202 let op = seq.kleene.op;
203 let idx_first = locs.len();
204 let idx_seq = idx_first - 1;
205 inner(&seq.tts, locs, next_metavar, seq_depth + 1);
206
207 if let Some(separator) = &seq.separator {
208 locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
209 locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
210 } else {
211 locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
212 }
213
214 // Overwrite the dummy value pushed above with the proper value.
215 locs[idx_seq] = MatcherLoc::Sequence {
216 op,
217 num_metavar_decls: seq.num_captures,
218 idx_first_after: locs.len(),
219 next_metavar: next_metavar_orig,
220 seq_depth,
221 };
222 }
223 &TokenTree::MetaVarDecl(span, bind, kind) => {
224 locs.push(MatcherLoc::MetaVarDecl {
225 span,
226 bind,
227 kind,
228 next_metavar: *next_metavar,
229 seq_depth,
230 });
231 *next_metavar += 1;
232 }
233 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
234 }
235 }
236 }
237
238 let mut locs = vec![];
239 let mut next_metavar = 0;
240 inner(matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
241
242 // A final entry is needed for eof.
243 locs.push(MatcherLoc::Eof);
244
245 locs
246}
247
248/// A single matcher position, representing the state of matching.
249#[derive(Debug)]
250struct MatcherPos {
251 /// The index into `TtParser::locs`, which represents the "dot".
252 idx: usize,
253
254 /// The matches made against metavar decls so far. On a successful match, this vector ends up
255 /// with one element per metavar decl in the matcher. Each element records token trees matched
256 /// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if
257 /// the corresponding metavar decl is within a sequence.
258 ///
259 /// It is critical to performance that this is an `Rc`, because it gets cloned frequently when
260 /// processing sequences. Mostly for sequence-ending possibilities that must be tried but end
261 /// up failing.
262 matches: Rc<Vec<NamedMatch>>,
263}
264
265// This type is used a lot. Make sure it doesn't unintentionally get bigger.
266#[cfg(target_pointer_width = "64")]
267rustc_data_structures::static_assert_size!(MatcherPos, 16);
268
269impl MatcherPos {
270 /// Adds `m` as a named match for the `metavar_idx`-th metavar. There are only two call sites,
271 /// and both are hot enough to be always worth inlining.
272 #[inline(always)]
273 fn push_match(&mut self, metavar_idx: usize, seq_depth: usize, m: NamedMatch) {
274 let matches = Rc::make_mut(&mut self.matches);
275 match seq_depth {
276 0 => {
277 // We are not within a sequence. Just append `m`.
278 assert_eq!(metavar_idx, matches.len());
279 matches.push(m);
280 }
281 _ => {
282 // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth
283 // and append `m` to its vector.
284 let mut curr = &mut matches[metavar_idx];
285 for _ in 0..seq_depth - 1 {
286 match curr {
287 MatchedSeq(seq) => curr = seq.last_mut().unwrap(),
288 _ => unreachable!(),
289 }
290 }
291 match curr {
292 MatchedSeq(seq) => seq.push(m),
293 _ => unreachable!(),
294 }
295 }
296 }
297 }
298}
299
300enum EofMatcherPositions {
301 None,
302 One(MatcherPos),
303 Multiple,
304}
305
306/// Represents the possible results of an attempted parse.
307pub(crate) enum ParseResult<T, F> {
308 /// Parsed successfully.
309 Success(T),
310 /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected
311 /// end of macro invocation. Otherwise, it indicates that no rules expected the given token.
312 /// The usize is the approximate position of the token in the input token stream.
313 Failure(F),
314 /// Fatal error (malformed macro?). Abort compilation.
315 Error(rustc_span::Span, String),
316 ErrorReported(ErrorGuaranteed),
317}
318
319/// A `ParseResult` where the `Success` variant contains a mapping of
320/// `MacroRulesNormalizedIdent`s to `NamedMatch`es. This represents the mapping
321/// of metavars to the token trees they bind to.
322pub(crate) type NamedParseResult<F> = ParseResult<NamedMatches, F>;
323
324/// Contains a mapping of `MacroRulesNormalizedIdent`s to `NamedMatch`es.
325/// This represents the mapping of metavars to the token trees they bind to.
326pub(crate) type NamedMatches = FxHashMap<MacroRulesNormalizedIdent, NamedMatch>;
327
328/// Count how many metavars declarations are in `matcher`.
329pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize {
330 matcher
331 .iter()
332 .map(|tt| match tt {
333 TokenTree::MetaVarDecl(..) => 1,
334 TokenTree::Sequence(_, seq) => seq.num_captures,
335 TokenTree::Delimited(.., delim) => count_metavar_decls(&delim.tts),
336 TokenTree::Token(..) => 0,
337 TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
338 })
339 .sum()
340}
341
342/// `NamedMatch` is a pattern-match result for a single metavar. All
343/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
344/// (expr, item, etc).
345///
346/// The in-memory structure of a particular `NamedMatch` represents the match
347/// that occurred when a particular subset of a matcher was applied to a
348/// particular token tree.
349///
350/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
351/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
352/// to: each `MatchedSeq` corresponds to a single repetition in the originating
353/// token tree. The depth of the `NamedMatch` structure will therefore depend
354/// only on the nesting depth of repetitions in the originating token tree it
355/// was derived from.
356///
357/// In layperson's terms: `NamedMatch` will form a tree representing nested matches of a particular
358/// meta variable. For example, if we are matching the following macro against the following
359/// invocation...
360///
361/// ```rust
362/// macro_rules! foo {
363/// ($($($x:ident),+);+) => {}
364/// }
365///
366/// foo!(a, b, c, d; a, b, c, d, e);
367/// ```
368///
369/// Then, the tree will have the following shape:
370///
371/// ```ignore (private-internal)
372/// # use NamedMatch::*;
373/// MatchedSeq([
374/// MatchedSeq([
375/// MatchedNonterminal(a),
376/// MatchedNonterminal(b),
377/// MatchedNonterminal(c),
378/// MatchedNonterminal(d),
379/// ]),
380/// MatchedSeq([
381/// MatchedNonterminal(a),
382/// MatchedNonterminal(b),
383/// MatchedNonterminal(c),
384/// MatchedNonterminal(d),
385/// MatchedNonterminal(e),
386/// ])
387/// ])
388/// ```
389#[derive(Debug, Clone)]
390pub(crate) enum NamedMatch {
391 MatchedSeq(Vec<NamedMatch>),
392 MatchedSingle(ParseNtResult),
393}
394
395/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison)
396fn token_name_eq(t1: &Token, t2: &Token) -> bool {
397 if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) {
398 ident1.name == ident2.name && is_raw1 == is_raw2
399 } else if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) =
400 (t1.lifetime(), t2.lifetime())
401 {
402 ident1.name == ident2.name && is_raw1 == is_raw2
403 } else {
404 t1.kind == t2.kind
405 }
406}
407
408// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess
409// allocations we have a single vector for each kind that is cleared and reused repeatedly.
410pub(crate) struct TtParser {
411 macro_name: Ident,
412
413 /// The set of current mps to be processed. This should be empty by the end of a successful
414 /// execution of `parse_tt_inner`.
415 cur_mps: Vec<MatcherPos>,
416
417 /// The set of newly generated mps. These are used to replenish `cur_mps` in the function
418 /// `parse_tt`.
419 next_mps: Vec<MatcherPos>,
420
421 /// The set of mps that are waiting for the black-box parser.
422 bb_mps: Vec<MatcherPos>,
423
424 /// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules
425 /// that have no metavars.
426 empty_matches: Rc<Vec<NamedMatch>>,
427}
428
429impl TtParser {
430 pub(super) fn new(macro_name: Ident) -> TtParser {
431 TtParser {
432 macro_name,
433 cur_mps: vec![],
434 next_mps: vec![],
435 bb_mps: vec![],
436 empty_matches: Rc::new(vec![]),
437 }
438 }
439
440 pub(super) fn has_no_remaining_items_for_step(&self) -> bool {
441 self.cur_mps.is_empty()
442 }
443
444 /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
445 /// produce more mps in `next_mps` and `bb_mps`.
446 ///
447 /// # Returns
448 ///
449 /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept
450 /// track of through the mps generated.
451 fn parse_tt_inner<'matcher, T: Tracker<'matcher>>(
452 &mut self,
453 matcher: &'matcher [MatcherLoc],
454 token: &Token,
455 approx_position: u32,
456 track: &mut T,
457 ) -> Option<NamedParseResult<T::Failure>> {
458 // Matcher positions that would be valid if the macro invocation was over now. Only
459 // modified if `token == Eof`.
460 let mut eof_mps = EofMatcherPositions::None;
461
462 while let Some(mut mp) = self.cur_mps.pop() {
463 let matcher_loc = &matcher[mp.idx];
464 track.before_match_loc(self, matcher_loc);
465
466 match matcher_loc {
467 MatcherLoc::Token { token: t } => {
468 // If it's a doc comment, we just ignore it and move on to the next tt in the
469 // matcher. This is a bug, but #95267 showed that existing programs rely on
470 // this behaviour, and changing it would require some care and a transition
471 // period.
472 //
473 // If the token matches, we can just advance the parser.
474 //
475 // Otherwise, this match has failed, there is nothing to do, and hopefully
476 // another mp in `cur_mps` will match.
477 if matches!(t, Token { kind: DocComment(..), .. }) {
478 mp.idx += 1;
479 self.cur_mps.push(mp);
480 } else if token_name_eq(t, token) {
481 mp.idx += 1;
482 self.next_mps.push(mp);
483 }
484 }
485 MatcherLoc::Delimited => {
486 // Entering the delimiter is trivial.
487 mp.idx += 1;
488 self.cur_mps.push(mp);
489 }
490 &MatcherLoc::Sequence {
491 op,
492 num_metavar_decls,
493 idx_first_after,
494 next_metavar,
495 seq_depth,
496 } => {
497 // Install an empty vec for each metavar within the sequence.
498 for metavar_idx in next_metavar..next_metavar + num_metavar_decls {
499 mp.push_match(metavar_idx, seq_depth, MatchedSeq(vec![]));
500 }
501
502 if matches!(op, KleeneOp::ZeroOrMore | KleeneOp::ZeroOrOne) {
503 // Try zero matches of this sequence, by skipping over it.
504 self.cur_mps.push(MatcherPos {
505 idx: idx_first_after,
506 matches: Rc::clone(&mp.matches),
507 });
508 }
509
510 // Try one or more matches of this sequence, by entering it.
511 mp.idx += 1;
512 self.cur_mps.push(mp);
513 }
514 &MatcherLoc::SequenceKleeneOpNoSep { op, idx_first } => {
515 // We are past the end of a sequence with no separator. Try ending the
516 // sequence. If that's not possible, `ending_mp` will fail quietly when it is
517 // processed next time around the loop.
518 let ending_mp = MatcherPos {
519 idx: mp.idx + 1, // +1 skips the Kleene op
520 matches: Rc::clone(&mp.matches),
521 };
522 self.cur_mps.push(ending_mp);
523
524 if op != KleeneOp::ZeroOrOne {
525 // Try another repetition.
526 mp.idx = idx_first;
527 self.cur_mps.push(mp);
528 }
529 }
530 MatcherLoc::SequenceSep { separator } => {
531 // We are past the end of a sequence with a separator but we haven't seen the
532 // separator yet. Try ending the sequence. If that's not possible, `ending_mp`
533 // will fail quietly when it is processed next time around the loop.
534 let ending_mp = MatcherPos {
535 idx: mp.idx + 2, // +2 skips the separator and the Kleene op
536 matches: Rc::clone(&mp.matches),
537 };
538 self.cur_mps.push(ending_mp);
539
540 if token_name_eq(token, separator) {
541 // The separator matches the current token. Advance past it.
542 mp.idx += 1;
543 self.next_mps.push(mp);
544 } else {
545 track.set_expected_token(separator);
546 }
547 }
548 &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
549 // We are past the sequence separator. This can't be a `?` Kleene op, because
550 // they don't permit separators. Try another repetition.
551 mp.idx = idx_first;
552 self.cur_mps.push(mp);
553 }
554 &MatcherLoc::MetaVarDecl { span, kind, .. } => {
555 // Built-in nonterminals never start with these tokens, so we can eliminate
556 // them from consideration. We use the span of the metavariable declaration
557 // to determine any edition-specific matching behavior for non-terminals.
558 if let Some(kind) = kind {
559 if Parser::nonterminal_may_begin_with(kind, token) {
560 self.bb_mps.push(mp);
561 }
562 } else {
563 // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
564 // Both this check and the one in `nameize` are necessary, surprisingly.
565 return Some(Error(span, "missing fragment specifier".to_string()));
566 }
567 }
568 MatcherLoc::Eof => {
569 // We are past the matcher's end, and not in a sequence. Try to end things.
570 debug_assert_eq!(mp.idx, matcher.len() - 1);
571 if *token == token::Eof {
572 eof_mps = match eof_mps {
573 EofMatcherPositions::None => EofMatcherPositions::One(mp),
574 EofMatcherPositions::One(_) | EofMatcherPositions::Multiple => {
575 EofMatcherPositions::Multiple
576 }
577 }
578 }
579 }
580 }
581 }
582
583 // If we reached the end of input, check that there is EXACTLY ONE possible matcher.
584 // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error.
585 if *token == token::Eof {
586 Some(match eof_mps {
587 EofMatcherPositions::One(mut eof_mp) => {
588 // Need to take ownership of the matches from within the `Rc`.
589 Rc::make_mut(&mut eof_mp.matches);
590 let matches = Rc::try_unwrap(eof_mp.matches).unwrap().into_iter();
591 self.nameize(matcher, matches)
592 }
593 EofMatcherPositions::Multiple => {
594 Error(token.span, "ambiguity: multiple successful parses".to_string())
595 }
596 EofMatcherPositions::None => Failure(T::build_failure(
597 Token::new(
598 token::Eof,
599 if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() },
600 ),
601 approx_position,
602 "missing tokens in macro arguments",
603 )),
604 })
605 } else {
606 None
607 }
608 }
609
610 /// Match the token stream from `parser` against `matcher`.
611 pub(super) fn parse_tt<'matcher, T: Tracker<'matcher>>(
612 &mut self,
613 parser: &mut Cow<'_, Parser<'_>>,
614 matcher: &'matcher [MatcherLoc],
615 track: &mut T,
616 ) -> NamedParseResult<T::Failure> {
617 // A queue of possible matcher positions. We initialize it with the matcher position in
618 // which the "dot" is before the first token of the first token tree in `matcher`.
619 // `parse_tt_inner` then processes all of these possible matcher positions and produces
620 // possible next positions into `next_mps`. After some post-processing, the contents of
621 // `next_mps` replenish `cur_mps` and we start over again.
622 self.cur_mps.clear();
623 self.cur_mps.push(MatcherPos { idx: 0, matches: Rc::clone(&self.empty_matches) });
624
625 loop {
626 self.next_mps.clear();
627 self.bb_mps.clear();
628
629 // Process `cur_mps` until either we have finished the input or we need to get some
630 // parsing from the black-box parser done.
631 let res = self.parse_tt_inner(
632 matcher,
633 &parser.token,
634 parser.approx_token_stream_pos(),
635 track,
636 );
637
638 if let Some(res) = res {
639 return res;
640 }
641
642 // `parse_tt_inner` handled all of `cur_mps`, so it's empty.
643 assert!(self.cur_mps.is_empty());
644
645 // Error messages here could be improved with links to original rules.
646 match (self.next_mps.len(), self.bb_mps.len()) {
647 (0, 0) => {
648 // There are no possible next positions AND we aren't waiting for the black-box
649 // parser: syntax error.
650 return Failure(T::build_failure(
651 parser.token.clone(),
652 parser.approx_token_stream_pos(),
653 "no rules expected this token in macro call",
654 ));
655 }
656
657 (_, 0) => {
658 // Dump all possible `next_mps` into `cur_mps` for the next iteration. Then
659 // process the next token.
660 self.cur_mps.append(&mut self.next_mps);
661 parser.to_mut().bump();
662 }
663
664 (0, 1) => {
665 // We need to call the black-box parser to get some nonterminal.
666 let mut mp = self.bb_mps.pop().unwrap();
667 let loc = &matcher[mp.idx];
668 if let &MatcherLoc::MetaVarDecl {
669 span,
670 kind: Some(kind),
671 next_metavar,
672 seq_depth,
673 ..
674 } = loc
675 {
676 // We use the span of the metavariable declaration to determine any
677 // edition-specific matching behavior for non-terminals.
678 let nt = match parser.to_mut().parse_nonterminal(kind) {
679 Err(err) => {
680 let guarantee = err.with_span_label(
681 span,
682 format!(
683 "while parsing argument for this `{kind}` macro fragment"
684 ),
685 )
686 .emit();
687 return ErrorReported(guarantee);
688 }
689 Ok(nt) => nt,
690 };
691 mp.push_match(next_metavar, seq_depth, MatchedSingle(nt));
692 mp.idx += 1;
693 } else {
694 unreachable!()
695 }
696 self.cur_mps.push(mp);
697 }
698
699 (_, _) => {
700 // Too many possibilities!
701 return self.ambiguity_error(matcher, parser.token.span);
702 }
703 }
704
705 assert!(!self.cur_mps.is_empty());
706 }
707 }
708
709 fn ambiguity_error<F>(
710 &self,
711 matcher: &[MatcherLoc],
712 token_span: rustc_span::Span,
713 ) -> NamedParseResult<F> {
714 let nts = self
715 .bb_mps
716 .iter()
717 .map(|mp| match &matcher[mp.idx] {
718 MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
719 format!("{kind} ('{bind}')")
720 }
721 _ => unreachable!(),
722 })
723 .collect::<Vec<String>>()
724 .join(" or ");
725
726 Error(
727 token_span,
728 format!(
729 "local ambiguity when calling macro `{}`: multiple parsing options: {}",
730 self.macro_name,
731 match self.next_mps.len() {
732 0 => format!("built-in NTs {nts}."),
733 n => format!("built-in NTs {nts} or {n} other option{s}.", s = pluralize!(n)),
734 }
735 ),
736 )
737 }
738
739 fn nameize<I: Iterator<Item = NamedMatch>, F>(
740 &self,
741 matcher: &[MatcherLoc],
742 mut res: I,
743 ) -> NamedParseResult<F> {
744 // Make that each metavar has _exactly one_ binding. If so, insert the binding into the
745 // `NamedParseResult`. Otherwise, it's an error.
746 let mut ret_val = FxHashMap::default();
747 for loc in matcher {
748 if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
749 if kind.is_some() {
750 match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
751 Vacant(spot) => spot.insert(res.next().unwrap()),
752 Occupied(..) => {
753 return Error(span, format!("duplicated bind name: {bind}"));
754 }
755 };
756 } else {
757 // E.g. `$e` instead of `$e:expr`, reported as a hard error if actually used.
758 // Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
759 return Error(span, "missing fragment specifier".to_string());
760 }
761 }
762 }
763 Success(ret_val)
764 }
765}