rustc_parse/lexer/
tokentrees.rs

1use rustc_ast::token::{self, Delimiter, Token};
2use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
3use rustc_ast_pretty::pprust::token_to_string;
4use rustc_errors::Diag;
5
6use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level};
7use super::{Lexer, UnmatchedDelim};
8
9impl<'psess, 'src> Lexer<'psess, 'src> {
10    // Lex into a token stream. The `Spacing` in the result is that of the
11    // opening delimiter.
12    pub(super) fn lex_token_trees(
13        &mut self,
14        is_delimited: bool,
15    ) -> Result<(Spacing, TokenStream), Vec<Diag<'psess>>> {
16        // Move past the opening delimiter.
17        let open_spacing = self.bump_minimal();
18
19        let mut buf = Vec::new();
20        loop {
21            if let Some(delim) = self.token.kind.open_delim() {
22                // Invisible delimiters cannot occur here because `TokenTreesReader` parses
23                // code directly from strings, with no macro expansion involved.
24                debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
25                buf.push(match self.lex_token_tree_open_delim(delim) {
26                    Ok(val) => val,
27                    Err(errs) => return Err(errs),
28                })
29            } else if let Some(delim) = self.token.kind.close_delim() {
30                // Invisible delimiters cannot occur here because `TokenTreesReader` parses
31                // code directly from strings, with no macro expansion involved.
32                debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
33                return if is_delimited {
34                    Ok((open_spacing, TokenStream::new(buf)))
35                } else {
36                    Err(vec![self.close_delim_err(delim)])
37                };
38            } else if self.token.kind == token::Eof {
39                return if is_delimited {
40                    Err(vec![self.eof_err()])
41                } else {
42                    Ok((open_spacing, TokenStream::new(buf)))
43                };
44            } else {
45                // Get the next normal token.
46                let (this_tok, this_spacing) = self.bump();
47                buf.push(TokenTree::Token(this_tok, this_spacing));
48            }
49        }
50    }
51
52    fn eof_err(&mut self) -> Diag<'psess> {
53        let msg = "this file contains an unclosed delimiter";
54        let mut err = self.dcx().struct_span_err(self.token.span, msg);
55
56        let unclosed_delimiter_show_limit = 5;
57        let len = usize::min(unclosed_delimiter_show_limit, self.diag_info.open_delimiters.len());
58        for &(_, span) in &self.diag_info.open_delimiters[..len] {
59            err.span_label(span, "unclosed delimiter");
60            self.diag_info.unmatched_delims.push(UnmatchedDelim {
61                found_delim: None,
62                found_span: self.token.span,
63                unclosed_span: Some(span),
64                candidate_span: None,
65            });
66        }
67
68        if let Some((_, span)) = self.diag_info.open_delimiters.get(unclosed_delimiter_show_limit)
69            && self.diag_info.open_delimiters.len() >= unclosed_delimiter_show_limit + 2
70        {
71            err.span_label(
72                *span,
73                format!(
74                    "another {} unclosed delimiters begin from here",
75                    self.diag_info.open_delimiters.len() - unclosed_delimiter_show_limit
76                ),
77            );
78        }
79
80        if let Some((delim, _)) = self.diag_info.open_delimiters.last() {
81            report_suspicious_mismatch_block(
82                &mut err,
83                &self.diag_info,
84                self.psess.source_map(),
85                *delim,
86            )
87        }
88        err
89    }
90
91    fn lex_token_tree_open_delim(
92        &mut self,
93        open_delim: Delimiter,
94    ) -> Result<TokenTree, Vec<Diag<'psess>>> {
95        // The span for beginning of the delimited section.
96        let pre_span = self.token.span;
97
98        self.diag_info.open_delimiters.push((open_delim, self.token.span));
99
100        // Lex the token trees within the delimiters.
101        // We stop at any delimiter so we can try to recover if the user
102        // uses an incorrect delimiter.
103        let (open_spacing, tts) = self.lex_token_trees(/* is_delimited */ true)?;
104
105        // Expand to cover the entire delimited token tree.
106        let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
107        let sm = self.psess.source_map();
108
109        let close_spacing = if let Some(close_delim) = self.token.kind.close_delim() {
110            if close_delim == open_delim {
111                // Correct delimiter.
112                let (open_delimiter, open_delimiter_span) =
113                    self.diag_info.open_delimiters.pop().unwrap();
114                let close_delimiter_span = self.token.span;
115
116                if tts.is_empty() && close_delim == Delimiter::Brace {
117                    let empty_block_span = open_delimiter_span.to(close_delimiter_span);
118                    if !sm.is_multiline(empty_block_span) {
119                        // Only track if the block is in the form of `{}`, otherwise it is
120                        // likely that it was written on purpose.
121                        self.diag_info.empty_block_spans.push(empty_block_span);
122                    }
123                }
124
125                // only add braces
126                if let (Delimiter::Brace, Delimiter::Brace) = (open_delimiter, open_delim) {
127                    // Add all the matching spans, we will sort by span later
128                    self.diag_info
129                        .matching_block_spans
130                        .push((open_delimiter_span, close_delimiter_span));
131                }
132
133                // Move past the closing delimiter.
134                self.bump_minimal()
135            } else {
136                // Incorrect delimiter.
137                let mut unclosed_delimiter = None;
138                let mut candidate = None;
139
140                if self.diag_info.last_unclosed_found_span != Some(self.token.span) {
141                    // do not complain about the same unclosed delimiter multiple times
142                    self.diag_info.last_unclosed_found_span = Some(self.token.span);
143                    // This is a conservative error: only report the last unclosed
144                    // delimiter. The previous unclosed delimiters could actually be
145                    // closed! The lexer just hasn't gotten to them yet.
146                    if let Some(&(_, sp)) = self.diag_info.open_delimiters.last() {
147                        unclosed_delimiter = Some(sp);
148                    };
149                    for (delimiter, delimiter_span) in &self.diag_info.open_delimiters {
150                        if same_indentation_level(sm, self.token.span, *delimiter_span)
151                            && delimiter == &close_delim
152                        {
153                            // high likelihood of these two corresponding
154                            candidate = Some(*delimiter_span);
155                        }
156                    }
157                    let (_, _) = self.diag_info.open_delimiters.pop().unwrap();
158                    self.diag_info.unmatched_delims.push(UnmatchedDelim {
159                        found_delim: Some(close_delim),
160                        found_span: self.token.span,
161                        unclosed_span: unclosed_delimiter,
162                        candidate_span: candidate,
163                    });
164                } else {
165                    self.diag_info.open_delimiters.pop();
166                }
167
168                // If the incorrect delimiter matches an earlier opening
169                // delimiter, then don't consume it (it can be used to
170                // close the earlier one). Otherwise, consume it.
171                // E.g., we try to recover from:
172                // fn foo() {
173                //     bar(baz(
174                // }  // Incorrect delimiter but matches the earlier `{`
175                if !self.diag_info.open_delimiters.iter().any(|&(d, _)| d == close_delim) {
176                    self.bump_minimal()
177                } else {
178                    // The choice of value here doesn't matter.
179                    Spacing::Alone
180                }
181            }
182        } else {
183            assert_eq!(self.token.kind, token::Eof);
184            // Silently recover, the EOF token will be seen again
185            // and an error emitted then. Thus we don't pop from
186            // self.open_delimiters here. The choice of spacing value here
187            // doesn't matter.
188            Spacing::Alone
189        };
190
191        let spacing = DelimSpacing::new(open_spacing, close_spacing);
192
193        Ok(TokenTree::Delimited(delim_span, spacing, open_delim, tts))
194    }
195
196    // Move on to the next token, returning the current token and its spacing.
197    // Will glue adjacent single-char tokens together.
198    fn bump(&mut self) -> (Token, Spacing) {
199        let (this_spacing, next_tok) = loop {
200            let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
201
202            if is_next_tok_preceded_by_whitespace {
203                break (Spacing::Alone, next_tok);
204            } else if let Some(glued) = self.token.glue(&next_tok) {
205                self.token = glued;
206            } else {
207                let this_spacing = if next_tok.is_punct() {
208                    Spacing::Joint
209                } else if next_tok == token::Eof {
210                    Spacing::Alone
211                } else {
212                    Spacing::JointHidden
213                };
214                break (this_spacing, next_tok);
215            }
216        };
217        let this_tok = std::mem::replace(&mut self.token, next_tok);
218        (this_tok, this_spacing)
219    }
220
221    // Cut-down version of `bump` used when the token kind is known in advance.
222    fn bump_minimal(&mut self) -> Spacing {
223        let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
224
225        let this_spacing = if is_next_tok_preceded_by_whitespace {
226            Spacing::Alone
227        } else {
228            if next_tok.is_punct() {
229                Spacing::Joint
230            } else if next_tok == token::Eof {
231                Spacing::Alone
232            } else {
233                Spacing::JointHidden
234            }
235        };
236
237        self.token = next_tok;
238        this_spacing
239    }
240
241    fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
242        // An unexpected closing delimiter (i.e., there is no matching opening delimiter).
243        let token_str = token_to_string(&self.token);
244        let msg = format!("unexpected closing delimiter: `{token_str}`");
245        let mut err = self.dcx().struct_span_err(self.token.span, msg);
246
247        report_suspicious_mismatch_block(&mut err, &self.diag_info, self.psess.source_map(), delim);
248        err.span_label(self.token.span, "unexpected closing delimiter");
249        err
250    }
251}