Skip to main content

rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![cfg_attr(bootstrap, feature(assert_matches))]
5#![cfg_attr(test, feature(iter_order_by))]
6#![feature(box_patterns)]
7#![feature(debug_closure_helpers)]
8#![feature(default_field_values)]
9#![feature(iter_intersperse)]
10#![recursion_limit = "256"]
11// tidy-alphabetical-end
12
13use std::path::{Path, PathBuf};
14use std::str::Utf8Error;
15use std::sync::Arc;
16
17use rustc_ast as ast;
18use rustc_ast::token;
19use rustc_ast::tokenstream::TokenStream;
20use rustc_ast_pretty::pprust;
21use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
22pub use rustc_lexer::UNICODE_VERSION;
23use rustc_session::parse::ParseSess;
24use rustc_span::source_map::SourceMap;
25use rustc_span::{FileName, SourceFile, Span};
26
27pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
28
29#[macro_use]
30pub mod parser;
31use parser::Parser;
32
33use crate::lexer::StripTokens;
34
35pub mod lexer;
36
37mod errors;
38
39// Make sure that the Unicode version of the dependencies is the same.
40const _: () = {
41    let rustc_lexer = rustc_lexer::UNICODE_VERSION;
42    let rustc_span = rustc_span::UNICODE_VERSION;
43    let normalization = unicode_normalization::UNICODE_VERSION;
44    let width = unicode_width::UNICODE_VERSION;
45
46    if rustc_lexer.0 != rustc_span.0
47        || rustc_lexer.1 != rustc_span.1
48        || rustc_lexer.2 != rustc_span.2
49    {
50        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and rustc_span must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are different."));
};panic!(
51            "rustc_lexer and rustc_span must use the same Unicode version, \
52            `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are \
53            different."
54        );
55    }
56
57    if rustc_lexer.0 != normalization.0
58        || rustc_lexer.1 != normalization.1
59        || rustc_lexer.2 != normalization.2
60    {
61        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and unicode-normalization must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are different."));
};panic!(
62            "rustc_lexer and unicode-normalization must use the same Unicode version, \
63            `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are \
64            different."
65        );
66    }
67
68    if rustc_lexer.0 != width.0 || rustc_lexer.1 != width.1 || rustc_lexer.2 != width.2 {
69        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and unicode-width must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are different."));
};panic!(
70            "rustc_lexer and unicode-width must use the same Unicode version, \
71            `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are \
72            different."
73        );
74    }
75};
76
77// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
78pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
79    match expr {
80        Ok(expr) => expr,
81        Err(errs) => {
82            for err in errs {
83                err.emit();
84            }
85            FatalError.raise()
86        }
87    }
88}
89
90/// Creates a new parser from a source string.
91///
92/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
93/// etc., otherwise a panic will occur when they are dropped.
94pub fn new_parser_from_source_str(
95    psess: &ParseSess,
96    name: FileName,
97    source: String,
98    strip_tokens: StripTokens,
99) -> Result<Parser<'_>, Vec<Diag<'_>>> {
100    let source_file = psess.source_map().new_source_file(name, source);
101    new_parser_from_source_file(psess, source_file, strip_tokens)
102}
103
104/// Creates a new parser from a filename. On failure, the errors must be consumed via
105/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
106/// dropped.
107///
108/// If a span is given, that is used on an error as the source of the problem.
109pub fn new_parser_from_file<'a>(
110    psess: &'a ParseSess,
111    path: &Path,
112    strip_tokens: StripTokens,
113    sp: Option<Span>,
114) -> Result<Parser<'a>, Vec<Diag<'a>>> {
115    let sm = psess.source_map();
116    let source_file = sm.load_file(path).unwrap_or_else(|e| {
117        let msg = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("couldn\'t read `{0}`: {1}",
                path.display(), e))
    })format!("couldn't read `{}`: {}", path.display(), e);
118        let mut err = psess.dcx().struct_fatal(msg);
119        if let Ok(contents) = std::fs::read(path)
120            && let Err(utf8err) = std::str::from_utf8(&contents)
121        {
122            utf8_error(sm, &path.display().to_string(), sp, &mut err, utf8err, &contents);
123        }
124        if let Some(sp) = sp {
125            err.span(sp);
126        }
127        err.emit()
128    });
129    new_parser_from_source_file(psess, source_file, strip_tokens)
130}
131
132pub fn utf8_error<E: EmissionGuarantee>(
133    sm: &SourceMap,
134    path: &str,
135    sp: Option<Span>,
136    err: &mut Diag<'_, E>,
137    utf8err: Utf8Error,
138    contents: &[u8],
139) {
140    // The file exists, but it wasn't valid UTF-8.
141    let start = utf8err.valid_up_to();
142    let note = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("invalid utf-8 at byte `{0}`",
                start))
    })format!("invalid utf-8 at byte `{start}`");
143    let msg = if let Some(len) = utf8err.error_len() {
144        ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("byte{1} `{0}` {2} not valid utf-8",
                if len == 1 {
                    ::alloc::__export::must_use({
                            ::alloc::fmt::format(format_args!("{0:?}", contents[start]))
                        })
                } else {
                    ::alloc::__export::must_use({
                            ::alloc::fmt::format(format_args!("{0:?}",
                                    &contents[start..start + len]))
                        })
                }, if len == 1 { "" } else { "s" },
                if len == 1 { "is" } else { "are" }))
    })format!(
145            "byte{s} `{bytes}` {are} not valid utf-8",
146            bytes = if len == 1 {
147                format!("{:?}", contents[start])
148            } else {
149                format!("{:?}", &contents[start..start + len])
150            },
151            s = pluralize!(len),
152            are = if len == 1 { "is" } else { "are" },
153        )
154    } else {
155        note.clone()
156    };
157    let contents = String::from_utf8_lossy(contents).to_string();
158
159    // We only emit this error for files in the current session
160    // so the working directory can only be the current working directory
161    let filename = FileName::Real(
162        sm.path_mapping().to_real_filename(sm.working_dir(), PathBuf::from(path).as_path()),
163    );
164    let source = sm.new_source_file(filename, contents);
165
166    // Avoid out-of-bounds span from lossy UTF-8 conversion.
167    if start as u32 > source.normalized_source_len.0 {
168        err.note(note);
169        return;
170    }
171
172    let span = Span::with_root_ctxt(
173        source.normalized_byte_pos(start as u32),
174        source.normalized_byte_pos(start as u32),
175    );
176    if span.is_dummy() {
177        err.note(note);
178    } else {
179        if sp.is_some() {
180            err.span_note(span, msg);
181        } else {
182            err.span(span);
183            err.span_label(span, msg);
184        }
185    }
186}
187
188/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
189/// the initial token stream.
190fn new_parser_from_source_file(
191    psess: &ParseSess,
192    source_file: Arc<SourceFile>,
193    strip_tokens: StripTokens,
194) -> Result<Parser<'_>, Vec<Diag<'_>>> {
195    let end_pos = source_file.end_position();
196    let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?;
197    let mut parser = Parser::new(psess, stream, None);
198    if parser.token == token::Eof {
199        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
200    }
201    Ok(parser)
202}
203
204/// Given a source string, produces a sequence of token trees.
205///
206/// NOTE: This only strips shebangs, not frontmatter!
207pub fn source_str_to_stream(
208    psess: &ParseSess,
209    name: FileName,
210    source: String,
211    override_span: Option<Span>,
212) -> Result<TokenStream, Vec<Diag<'_>>> {
213    let source_file = psess.source_map().new_source_file(name, source);
214    // FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them
215    // in the current edition since that would be breaking.
216    // See also <https://github.com/rust-lang/rust/issues/145520>.
217    // Alternatively, stop stripping shebangs here, too, if T-lang and crater approve.
218    source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
219}
220
221/// Given a source file, produces a sequence of token trees.
222///
223/// Returns any buffered errors from parsing the token stream.
224fn source_file_to_stream<'psess>(
225    psess: &'psess ParseSess,
226    source_file: Arc<SourceFile>,
227    override_span: Option<Span>,
228    strip_tokens: StripTokens,
229) -> Result<TokenStream, Vec<Diag<'psess>>> {
230    let src = source_file.src.as_ref().unwrap_or_else(|| {
231        psess.dcx().bug(::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("cannot lex `source_file` without source: {0}",
                psess.source_map().filename_for_diagnostics(&source_file.name)))
    })format!(
232            "cannot lex `source_file` without source: {}",
233            psess.source_map().filename_for_diagnostics(&source_file.name)
234        ));
235    });
236
237    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens)
238}
239
240/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
241pub fn parse_in<'a, T>(
242    psess: &'a ParseSess,
243    tts: TokenStream,
244    name: &'static str,
245    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
246) -> PResult<'a, T> {
247    let mut parser = Parser::new(psess, tts, Some(name));
248    let result = f(&mut parser)?;
249    if parser.token != token::Eof {
250        parser.unexpected()?;
251    }
252    Ok(result)
253}
254
255pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
256    let source = pprust::item_to_string(item);
257    let filename = FileName::macro_expansion_source_code(&source);
258    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
259}
260
261pub fn fake_token_stream_for_foreign_item(
262    psess: &ParseSess,
263    item: &ast::ForeignItem,
264) -> TokenStream {
265    let source = pprust::foreign_item_to_string(item);
266    let filename = FileName::macro_expansion_source_code(&source);
267    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
268}
269
270pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
271    let source = pprust::crate_to_string_for_macros(krate);
272    let filename = FileName::macro_expansion_source_code(&source);
273    unwrap_or_emit_fatal(source_str_to_stream(
274        psess,
275        filename,
276        source,
277        Some(krate.spans.inner_span),
278    ))
279}