rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![allow(rustc::diagnostic_outside_of_impl)]
5#![allow(rustc::untranslatable_diagnostic)]
6#![feature(assert_matches)]
7#![feature(box_patterns)]
8#![feature(debug_closure_helpers)]
9#![feature(default_field_values)]
10#![feature(if_let_guard)]
11#![feature(iter_intersperse)]
12#![feature(iter_order_by)]
13#![recursion_limit = "256"]
14// tidy-alphabetical-end
15
16use std::path::{Path, PathBuf};
17use std::str::Utf8Error;
18use std::sync::Arc;
19
20use rustc_ast as ast;
21use rustc_ast::token;
22use rustc_ast::tokenstream::TokenStream;
23use rustc_ast_pretty::pprust;
24use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
25pub use rustc_lexer::UNICODE_VERSION;
26use rustc_session::parse::ParseSess;
27use rustc_span::source_map::SourceMap;
28use rustc_span::{FileName, SourceFile, Span};
29
30pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
31
32#[macro_use]
33pub mod parser;
34use parser::Parser;
35
36use crate::lexer::StripTokens;
37
38pub mod lexer;
39
40mod errors;
41
42// Make sure that the Unicode version of the dependencies is the same.
43const _: () = {
44    let rustc_lexer = rustc_lexer::UNICODE_VERSION;
45    let rustc_span = rustc_span::UNICODE_VERSION;
46    let normalization = unicode_normalization::UNICODE_VERSION;
47    let width = unicode_width::UNICODE_VERSION;
48
49    if rustc_lexer.0 != rustc_span.0
50        || rustc_lexer.1 != rustc_span.1
51        || rustc_lexer.2 != rustc_span.2
52    {
53        panic!(
54            "rustc_lexer and rustc_span must use the same Unicode version, \
55            `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are \
56            different."
57        );
58    }
59
60    if rustc_lexer.0 != normalization.0
61        || rustc_lexer.1 != normalization.1
62        || rustc_lexer.2 != normalization.2
63    {
64        panic!(
65            "rustc_lexer and unicode-normalization must use the same Unicode version, \
66            `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are \
67            different."
68        );
69    }
70
71    if rustc_lexer.0 != width.0 || rustc_lexer.1 != width.1 || rustc_lexer.2 != width.2 {
72        panic!(
73            "rustc_lexer and unicode-width must use the same Unicode version, \
74            `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are \
75            different."
76        );
77    }
78};
79
80rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
81
82// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
83pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
84    match expr {
85        Ok(expr) => expr,
86        Err(errs) => {
87            for err in errs {
88                err.emit();
89            }
90            FatalError.raise()
91        }
92    }
93}
94
95/// Creates a new parser from a source string.
96///
97/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
98/// etc., otherwise a panic will occur when they are dropped.
99pub fn new_parser_from_source_str(
100    psess: &ParseSess,
101    name: FileName,
102    source: String,
103    strip_tokens: StripTokens,
104) -> Result<Parser<'_>, Vec<Diag<'_>>> {
105    let source_file = psess.source_map().new_source_file(name, source);
106    new_parser_from_source_file(psess, source_file, strip_tokens)
107}
108
109/// Creates a new parser from a filename. On failure, the errors must be consumed via
110/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
111/// dropped.
112///
113/// If a span is given, that is used on an error as the source of the problem.
114pub fn new_parser_from_file<'a>(
115    psess: &'a ParseSess,
116    path: &Path,
117    strip_tokens: StripTokens,
118    sp: Option<Span>,
119) -> Result<Parser<'a>, Vec<Diag<'a>>> {
120    let sm = psess.source_map();
121    let source_file = sm.load_file(path).unwrap_or_else(|e| {
122        let msg = format!("couldn't read `{}`: {}", path.display(), e);
123        let mut err = psess.dcx().struct_fatal(msg);
124        if let Ok(contents) = std::fs::read(path)
125            && let Err(utf8err) = String::from_utf8(contents.clone())
126        {
127            utf8_error(
128                sm,
129                &path.display().to_string(),
130                sp,
131                &mut err,
132                utf8err.utf8_error(),
133                &contents,
134            );
135        }
136        if let Some(sp) = sp {
137            err.span(sp);
138        }
139        err.emit();
140    });
141    new_parser_from_source_file(psess, source_file, strip_tokens)
142}
143
144pub fn utf8_error<E: EmissionGuarantee>(
145    sm: &SourceMap,
146    path: &str,
147    sp: Option<Span>,
148    err: &mut Diag<'_, E>,
149    utf8err: Utf8Error,
150    contents: &[u8],
151) {
152    // The file exists, but it wasn't valid UTF-8.
153    let start = utf8err.valid_up_to();
154    let note = format!("invalid utf-8 at byte `{start}`");
155    let msg = if let Some(len) = utf8err.error_len() {
156        format!(
157            "byte{s} `{bytes}` {are} not valid utf-8",
158            bytes = if len == 1 {
159                format!("{:?}", contents[start])
160            } else {
161                format!("{:?}", &contents[start..start + len])
162            },
163            s = pluralize!(len),
164            are = if len == 1 { "is" } else { "are" },
165        )
166    } else {
167        note.clone()
168    };
169    let contents = String::from_utf8_lossy(contents).to_string();
170
171    // We only emit this error for files in the current session
172    // so the working directory can only be the current working directory
173    let filename = FileName::Real(
174        sm.path_mapping().to_real_filename(sm.working_dir(), PathBuf::from(path).as_path()),
175    );
176    let source = sm.new_source_file(filename, contents);
177
178    // Avoid out-of-bounds span from lossy UTF-8 conversion.
179    if start as u32 > source.normalized_source_len.0 {
180        err.note(note);
181        return;
182    }
183
184    let span = Span::with_root_ctxt(
185        source.normalized_byte_pos(start as u32),
186        source.normalized_byte_pos(start as u32),
187    );
188    if span.is_dummy() {
189        err.note(note);
190    } else {
191        if sp.is_some() {
192            err.span_note(span, msg);
193        } else {
194            err.span(span);
195            err.span_label(span, msg);
196        }
197    }
198}
199
200/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
201/// the initial token stream.
202fn new_parser_from_source_file(
203    psess: &ParseSess,
204    source_file: Arc<SourceFile>,
205    strip_tokens: StripTokens,
206) -> Result<Parser<'_>, Vec<Diag<'_>>> {
207    let end_pos = source_file.end_position();
208    let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?;
209    let mut parser = Parser::new(psess, stream, None);
210    if parser.token == token::Eof {
211        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
212    }
213    Ok(parser)
214}
215
216/// Given a source string, produces a sequence of token trees.
217///
218/// NOTE: This only strips shebangs, not frontmatter!
219pub fn source_str_to_stream(
220    psess: &ParseSess,
221    name: FileName,
222    source: String,
223    override_span: Option<Span>,
224) -> Result<TokenStream, Vec<Diag<'_>>> {
225    let source_file = psess.source_map().new_source_file(name, source);
226    // FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them
227    // in the current edition since that would be breaking.
228    // See also <https://github.com/rust-lang/rust/issues/145520>.
229    // Alternatively, stop stripping shebangs here, too, if T-lang and crater approve.
230    source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
231}
232
233/// Given a source file, produces a sequence of token trees.
234///
235/// Returns any buffered errors from parsing the token stream.
236fn source_file_to_stream<'psess>(
237    psess: &'psess ParseSess,
238    source_file: Arc<SourceFile>,
239    override_span: Option<Span>,
240    strip_tokens: StripTokens,
241) -> Result<TokenStream, Vec<Diag<'psess>>> {
242    let src = source_file.src.as_ref().unwrap_or_else(|| {
243        psess.dcx().bug(format!(
244            "cannot lex `source_file` without source: {}",
245            psess.source_map().filename_for_diagnostics(&source_file.name)
246        ));
247    });
248
249    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens)
250}
251
252/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
253pub fn parse_in<'a, T>(
254    psess: &'a ParseSess,
255    tts: TokenStream,
256    name: &'static str,
257    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
258) -> PResult<'a, T> {
259    let mut parser = Parser::new(psess, tts, Some(name));
260    let result = f(&mut parser)?;
261    if parser.token != token::Eof {
262        parser.unexpected()?;
263    }
264    Ok(result)
265}
266
267pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
268    let source = pprust::item_to_string(item);
269    let filename = FileName::macro_expansion_source_code(&source);
270    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
271}
272
273pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
274    let source = pprust::crate_to_string_for_macros(krate);
275    let filename = FileName::macro_expansion_source_code(&source);
276    unwrap_or_emit_fatal(source_str_to_stream(
277        psess,
278        filename,
279        source,
280        Some(krate.spans.inner_span),
281    ))
282}