rustc_parse/lexer/
tokentrees.rs

1use rustc_ast::token::{self, Delimiter, Token};
2use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
3use rustc_ast_pretty::pprust::token_to_string;
4use rustc_errors::Diag;
5
6use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level};
7use super::{Lexer, UnmatchedDelim};
8
9impl<'psess, 'src> Lexer<'psess, 'src> {
10    // Lex into a token stream. The `Spacing` in the result is that of the
11    // opening delimiter.
12    pub(super) fn lex_token_trees(
13        &mut self,
14        is_delimited: bool,
15    ) -> Result<(Spacing, TokenStream), Vec<Diag<'psess>>> {
16        // Move past the opening delimiter.
17        let open_spacing = self.bump_minimal();
18
19        let mut buf = Vec::new();
20        loop {
21            match self.token.kind {
22                token::OpenDelim(delim) => {
23                    // Invisible delimiters cannot occur here because `TokenTreesReader` parses
24                    // code directly from strings, with no macro expansion involved.
25                    debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
26                    buf.push(match self.lex_token_tree_open_delim(delim) {
27                        Ok(val) => val,
28                        Err(errs) => return Err(errs),
29                    })
30                }
31                token::CloseDelim(delim) => {
32                    // Invisible delimiters cannot occur here because `TokenTreesReader` parses
33                    // code directly from strings, with no macro expansion involved.
34                    debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
35                    return if is_delimited {
36                        Ok((open_spacing, TokenStream::new(buf)))
37                    } else {
38                        Err(vec![self.close_delim_err(delim)])
39                    };
40                }
41                token::Eof => {
42                    return if is_delimited {
43                        Err(vec![self.eof_err()])
44                    } else {
45                        Ok((open_spacing, TokenStream::new(buf)))
46                    };
47                }
48                _ => {
49                    // Get the next normal token.
50                    let (this_tok, this_spacing) = self.bump();
51                    buf.push(TokenTree::Token(this_tok, this_spacing));
52                }
53            }
54        }
55    }
56
57    fn eof_err(&mut self) -> Diag<'psess> {
58        let msg = "this file contains an unclosed delimiter";
59        let mut err = self.dcx().struct_span_err(self.token.span, msg);
60
61        let unclosed_delimiter_show_limit = 5;
62        let len = usize::min(unclosed_delimiter_show_limit, self.diag_info.open_braces.len());
63        for &(_, span) in &self.diag_info.open_braces[..len] {
64            err.span_label(span, "unclosed delimiter");
65            self.diag_info.unmatched_delims.push(UnmatchedDelim {
66                found_delim: None,
67                found_span: self.token.span,
68                unclosed_span: Some(span),
69                candidate_span: None,
70            });
71        }
72
73        if let Some((_, span)) = self.diag_info.open_braces.get(unclosed_delimiter_show_limit)
74            && self.diag_info.open_braces.len() >= unclosed_delimiter_show_limit + 2
75        {
76            err.span_label(
77                *span,
78                format!(
79                    "another {} unclosed delimiters begin from here",
80                    self.diag_info.open_braces.len() - unclosed_delimiter_show_limit
81                ),
82            );
83        }
84
85        if let Some((delim, _)) = self.diag_info.open_braces.last() {
86            report_suspicious_mismatch_block(
87                &mut err,
88                &self.diag_info,
89                self.psess.source_map(),
90                *delim,
91            )
92        }
93        err
94    }
95
96    fn lex_token_tree_open_delim(
97        &mut self,
98        open_delim: Delimiter,
99    ) -> Result<TokenTree, Vec<Diag<'psess>>> {
100        // The span for beginning of the delimited section.
101        let pre_span = self.token.span;
102
103        self.diag_info.open_braces.push((open_delim, self.token.span));
104
105        // Lex the token trees within the delimiters.
106        // We stop at any delimiter so we can try to recover if the user
107        // uses an incorrect delimiter.
108        let (open_spacing, tts) = self.lex_token_trees(/* is_delimited */ true)?;
109
110        // Expand to cover the entire delimited token tree.
111        let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
112        let sm = self.psess.source_map();
113
114        let close_spacing = match self.token.kind {
115            // Correct delimiter.
116            token::CloseDelim(close_delim) if close_delim == open_delim => {
117                let (open_brace, open_brace_span) = self.diag_info.open_braces.pop().unwrap();
118                let close_brace_span = self.token.span;
119
120                if tts.is_empty() && close_delim == Delimiter::Brace {
121                    let empty_block_span = open_brace_span.to(close_brace_span);
122                    if !sm.is_multiline(empty_block_span) {
123                        // Only track if the block is in the form of `{}`, otherwise it is
124                        // likely that it was written on purpose.
125                        self.diag_info.empty_block_spans.push(empty_block_span);
126                    }
127                }
128
129                // only add braces
130                if let (Delimiter::Brace, Delimiter::Brace) = (open_brace, open_delim) {
131                    // Add all the matching spans, we will sort by span later
132                    self.diag_info.matching_block_spans.push((open_brace_span, close_brace_span));
133                }
134
135                // Move past the closing delimiter.
136                self.bump_minimal()
137            }
138            // Incorrect delimiter.
139            token::CloseDelim(close_delim) => {
140                let mut unclosed_delimiter = None;
141                let mut candidate = None;
142
143                if self.diag_info.last_unclosed_found_span != Some(self.token.span) {
144                    // do not complain about the same unclosed delimiter multiple times
145                    self.diag_info.last_unclosed_found_span = Some(self.token.span);
146                    // This is a conservative error: only report the last unclosed
147                    // delimiter. The previous unclosed delimiters could actually be
148                    // closed! The lexer just hasn't gotten to them yet.
149                    if let Some(&(_, sp)) = self.diag_info.open_braces.last() {
150                        unclosed_delimiter = Some(sp);
151                    };
152                    for (brace, brace_span) in &self.diag_info.open_braces {
153                        if same_indentation_level(sm, self.token.span, *brace_span)
154                            && brace == &close_delim
155                        {
156                            // high likelihood of these two corresponding
157                            candidate = Some(*brace_span);
158                        }
159                    }
160                    let (_, _) = self.diag_info.open_braces.pop().unwrap();
161                    self.diag_info.unmatched_delims.push(UnmatchedDelim {
162                        found_delim: Some(close_delim),
163                        found_span: self.token.span,
164                        unclosed_span: unclosed_delimiter,
165                        candidate_span: candidate,
166                    });
167                } else {
168                    self.diag_info.open_braces.pop();
169                }
170
171                // If the incorrect delimiter matches an earlier opening
172                // delimiter, then don't consume it (it can be used to
173                // close the earlier one). Otherwise, consume it.
174                // E.g., we try to recover from:
175                // fn foo() {
176                //     bar(baz(
177                // }  // Incorrect delimiter but matches the earlier `{`
178                if !self.diag_info.open_braces.iter().any(|&(b, _)| b == close_delim) {
179                    self.bump_minimal()
180                } else {
181                    // The choice of value here doesn't matter.
182                    Spacing::Alone
183                }
184            }
185            token::Eof => {
186                // Silently recover, the EOF token will be seen again
187                // and an error emitted then. Thus we don't pop from
188                // self.open_braces here. The choice of spacing value here
189                // doesn't matter.
190                Spacing::Alone
191            }
192            _ => unreachable!(),
193        };
194
195        let spacing = DelimSpacing::new(open_spacing, close_spacing);
196
197        Ok(TokenTree::Delimited(delim_span, spacing, open_delim, tts))
198    }
199
200    // Move on to the next token, returning the current token and its spacing.
201    // Will glue adjacent single-char tokens together.
202    fn bump(&mut self) -> (Token, Spacing) {
203        let (this_spacing, next_tok) = loop {
204            let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
205
206            if is_next_tok_preceded_by_whitespace {
207                break (Spacing::Alone, next_tok);
208            } else if let Some(glued) = self.token.glue(&next_tok) {
209                self.token = glued;
210            } else {
211                let this_spacing = if next_tok.is_punct() {
212                    Spacing::Joint
213                } else if next_tok == token::Eof {
214                    Spacing::Alone
215                } else {
216                    Spacing::JointHidden
217                };
218                break (this_spacing, next_tok);
219            }
220        };
221        let this_tok = std::mem::replace(&mut self.token, next_tok);
222        (this_tok, this_spacing)
223    }
224
225    // Cut-down version of `bump` used when the token kind is known in advance.
226    fn bump_minimal(&mut self) -> Spacing {
227        let (next_tok, is_next_tok_preceded_by_whitespace) = self.next_token_from_cursor();
228
229        let this_spacing = if is_next_tok_preceded_by_whitespace {
230            Spacing::Alone
231        } else {
232            if next_tok.is_punct() {
233                Spacing::Joint
234            } else if next_tok == token::Eof {
235                Spacing::Alone
236            } else {
237                Spacing::JointHidden
238            }
239        };
240
241        self.token = next_tok;
242        this_spacing
243    }
244
245    fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
246        // An unexpected closing delimiter (i.e., there is no matching opening delimiter).
247        let token_str = token_to_string(&self.token);
248        let msg = format!("unexpected closing delimiter: `{token_str}`");
249        let mut err = self.dcx().struct_span_err(self.token.span, msg);
250
251        report_suspicious_mismatch_block(&mut err, &self.diag_info, self.psess.source_map(), delim);
252        err.span_label(self.token.span, "unexpected closing delimiter");
253        err
254    }
255}