Skip to main content

rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![feature(assert_matches)]
5#![feature(box_patterns)]
6#![feature(debug_closure_helpers)]
7#![feature(default_field_values)]
8#![feature(if_let_guard)]
9#![feature(iter_intersperse)]
10#![feature(iter_order_by)]
11#![recursion_limit = "256"]
12// tidy-alphabetical-end
13
14use std::path::{Path, PathBuf};
15use std::str::Utf8Error;
16use std::sync::Arc;
17
18use rustc_ast as ast;
19use rustc_ast::token;
20use rustc_ast::tokenstream::TokenStream;
21use rustc_ast_pretty::pprust;
22use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
23pub use rustc_lexer::UNICODE_VERSION;
24use rustc_session::parse::ParseSess;
25use rustc_span::source_map::SourceMap;
26use rustc_span::{FileName, SourceFile, Span};
27
28pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
29
30#[macro_use]
31pub mod parser;
32use parser::Parser;
33
34use crate::lexer::StripTokens;
35
36pub mod lexer;
37
38mod errors;
39
40// Make sure that the Unicode version of the dependencies is the same.
41const _: () = {
42    let rustc_lexer = rustc_lexer::UNICODE_VERSION;
43    let rustc_span = rustc_span::UNICODE_VERSION;
44    let normalization = unicode_normalization::UNICODE_VERSION;
45    let width = unicode_width::UNICODE_VERSION;
46
47    if rustc_lexer.0 != rustc_span.0
48        || rustc_lexer.1 != rustc_span.1
49        || rustc_lexer.2 != rustc_span.2
50    {
51        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and rustc_span must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are different."));
};panic!(
52            "rustc_lexer and rustc_span must use the same Unicode version, \
53            `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are \
54            different."
55        );
56    }
57
58    if rustc_lexer.0 != normalization.0
59        || rustc_lexer.1 != normalization.1
60        || rustc_lexer.2 != normalization.2
61    {
62        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and unicode-normalization must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are different."));
};panic!(
63            "rustc_lexer and unicode-normalization must use the same Unicode version, \
64            `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are \
65            different."
66        );
67    }
68
69    if rustc_lexer.0 != width.0 || rustc_lexer.1 != width.1 || rustc_lexer.2 != width.2 {
70        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and unicode-width must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are different."));
};panic!(
71            "rustc_lexer and unicode-width must use the same Unicode version, \
72            `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are \
73            different."
74        );
75    }
76};
77
78// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
79pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
80    match expr {
81        Ok(expr) => expr,
82        Err(errs) => {
83            for err in errs {
84                err.emit();
85            }
86            FatalError.raise()
87        }
88    }
89}
90
91/// Creates a new parser from a source string.
92///
93/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
94/// etc., otherwise a panic will occur when they are dropped.
95pub fn new_parser_from_source_str(
96    psess: &ParseSess,
97    name: FileName,
98    source: String,
99    strip_tokens: StripTokens,
100) -> Result<Parser<'_>, Vec<Diag<'_>>> {
101    let source_file = psess.source_map().new_source_file(name, source);
102    new_parser_from_source_file(psess, source_file, strip_tokens)
103}
104
105/// Creates a new parser from a filename. On failure, the errors must be consumed via
106/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
107/// dropped.
108///
109/// If a span is given, that is used on an error as the source of the problem.
110pub fn new_parser_from_file<'a>(
111    psess: &'a ParseSess,
112    path: &Path,
113    strip_tokens: StripTokens,
114    sp: Option<Span>,
115) -> Result<Parser<'a>, Vec<Diag<'a>>> {
116    let sm = psess.source_map();
117    let source_file = sm.load_file(path).unwrap_or_else(|e| {
118        let msg = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("couldn\'t read `{0}`: {1}",
                path.display(), e))
    })format!("couldn't read `{}`: {}", path.display(), e);
119        let mut err = psess.dcx().struct_fatal(msg);
120        if let Ok(contents) = std::fs::read(path)
121            && let Err(utf8err) = String::from_utf8(contents.clone())
122        {
123            utf8_error(
124                sm,
125                &path.display().to_string(),
126                sp,
127                &mut err,
128                utf8err.utf8_error(),
129                &contents,
130            );
131        }
132        if let Some(sp) = sp {
133            err.span(sp);
134        }
135        err.emit();
136    });
137    new_parser_from_source_file(psess, source_file, strip_tokens)
138}
139
140pub fn utf8_error<E: EmissionGuarantee>(
141    sm: &SourceMap,
142    path: &str,
143    sp: Option<Span>,
144    err: &mut Diag<'_, E>,
145    utf8err: Utf8Error,
146    contents: &[u8],
147) {
148    // The file exists, but it wasn't valid UTF-8.
149    let start = utf8err.valid_up_to();
150    let note = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("invalid utf-8 at byte `{0}`",
                start))
    })format!("invalid utf-8 at byte `{start}`");
151    let msg = if let Some(len) = utf8err.error_len() {
152        ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("byte{1} `{0}` {2} not valid utf-8",
                if len == 1 {
                    ::alloc::__export::must_use({
                            ::alloc::fmt::format(format_args!("{0:?}", contents[start]))
                        })
                } else {
                    ::alloc::__export::must_use({
                            ::alloc::fmt::format(format_args!("{0:?}",
                                    &contents[start..start + len]))
                        })
                }, if len == 1 { "" } else { "s" },
                if len == 1 { "is" } else { "are" }))
    })format!(
153            "byte{s} `{bytes}` {are} not valid utf-8",
154            bytes = if len == 1 {
155                format!("{:?}", contents[start])
156            } else {
157                format!("{:?}", &contents[start..start + len])
158            },
159            s = pluralize!(len),
160            are = if len == 1 { "is" } else { "are" },
161        )
162    } else {
163        note.clone()
164    };
165    let contents = String::from_utf8_lossy(contents).to_string();
166
167    // We only emit this error for files in the current session
168    // so the working directory can only be the current working directory
169    let filename = FileName::Real(
170        sm.path_mapping().to_real_filename(sm.working_dir(), PathBuf::from(path).as_path()),
171    );
172    let source = sm.new_source_file(filename, contents);
173
174    // Avoid out-of-bounds span from lossy UTF-8 conversion.
175    if start as u32 > source.normalized_source_len.0 {
176        err.note(note);
177        return;
178    }
179
180    let span = Span::with_root_ctxt(
181        source.normalized_byte_pos(start as u32),
182        source.normalized_byte_pos(start as u32),
183    );
184    if span.is_dummy() {
185        err.note(note);
186    } else {
187        if sp.is_some() {
188            err.span_note(span, msg);
189        } else {
190            err.span(span);
191            err.span_label(span, msg);
192        }
193    }
194}
195
196/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
197/// the initial token stream.
198fn new_parser_from_source_file(
199    psess: &ParseSess,
200    source_file: Arc<SourceFile>,
201    strip_tokens: StripTokens,
202) -> Result<Parser<'_>, Vec<Diag<'_>>> {
203    let end_pos = source_file.end_position();
204    let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?;
205    let mut parser = Parser::new(psess, stream, None);
206    if parser.token == token::Eof {
207        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
208    }
209    Ok(parser)
210}
211
212/// Given a source string, produces a sequence of token trees.
213///
214/// NOTE: This only strips shebangs, not frontmatter!
215pub fn source_str_to_stream(
216    psess: &ParseSess,
217    name: FileName,
218    source: String,
219    override_span: Option<Span>,
220) -> Result<TokenStream, Vec<Diag<'_>>> {
221    let source_file = psess.source_map().new_source_file(name, source);
222    // FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them
223    // in the current edition since that would be breaking.
224    // See also <https://github.com/rust-lang/rust/issues/145520>.
225    // Alternatively, stop stripping shebangs here, too, if T-lang and crater approve.
226    source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
227}
228
229/// Given a source file, produces a sequence of token trees.
230///
231/// Returns any buffered errors from parsing the token stream.
232fn source_file_to_stream<'psess>(
233    psess: &'psess ParseSess,
234    source_file: Arc<SourceFile>,
235    override_span: Option<Span>,
236    strip_tokens: StripTokens,
237) -> Result<TokenStream, Vec<Diag<'psess>>> {
238    let src = source_file.src.as_ref().unwrap_or_else(|| {
239        psess.dcx().bug(::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("cannot lex `source_file` without source: {0}",
                psess.source_map().filename_for_diagnostics(&source_file.name)))
    })format!(
240            "cannot lex `source_file` without source: {}",
241            psess.source_map().filename_for_diagnostics(&source_file.name)
242        ));
243    });
244
245    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens)
246}
247
248/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
249pub fn parse_in<'a, T>(
250    psess: &'a ParseSess,
251    tts: TokenStream,
252    name: &'static str,
253    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
254) -> PResult<'a, T> {
255    let mut parser = Parser::new(psess, tts, Some(name));
256    let result = f(&mut parser)?;
257    if parser.token != token::Eof {
258        parser.unexpected()?;
259    }
260    Ok(result)
261}
262
263pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
264    let source = pprust::item_to_string(item);
265    let filename = FileName::macro_expansion_source_code(&source);
266    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
267}
268
269pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
270    let source = pprust::crate_to_string_for_macros(krate);
271    let filename = FileName::macro_expansion_source_code(&source);
272    unwrap_or_emit_fatal(source_str_to_stream(
273        psess,
274        filename,
275        source,
276        Some(krate.spans.inner_span),
277    ))
278}