Skip to main content

rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![cfg_attr(bootstrap, feature(assert_matches))]
5#![cfg_attr(bootstrap, feature(if_let_guard))]
6#![cfg_attr(test, feature(iter_order_by))]
7#![feature(box_patterns)]
8#![feature(debug_closure_helpers)]
9#![feature(default_field_values)]
10#![feature(iter_intersperse)]
11#![recursion_limit = "256"]
12// tidy-alphabetical-end
13
14use std::path::{Path, PathBuf};
15use std::str::Utf8Error;
16use std::sync::Arc;
17
18use rustc_ast as ast;
19use rustc_ast::token;
20use rustc_ast::tokenstream::TokenStream;
21use rustc_ast_pretty::pprust;
22use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
23pub use rustc_lexer::UNICODE_VERSION;
24use rustc_session::parse::ParseSess;
25use rustc_span::source_map::SourceMap;
26use rustc_span::{FileName, SourceFile, Span};
27
28pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
29
30#[macro_use]
31pub mod parser;
32use parser::Parser;
33
34use crate::lexer::StripTokens;
35
36pub mod lexer;
37
38mod errors;
39
40// Make sure that the Unicode version of the dependencies is the same.
41const _: () = {
42    let rustc_lexer = rustc_lexer::UNICODE_VERSION;
43    let rustc_span = rustc_span::UNICODE_VERSION;
44    let normalization = unicode_normalization::UNICODE_VERSION;
45    let width = unicode_width::UNICODE_VERSION;
46
47    if rustc_lexer.0 != rustc_span.0
48        || rustc_lexer.1 != rustc_span.1
49        || rustc_lexer.2 != rustc_span.2
50    {
51        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and rustc_span must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are different."));
};panic!(
52            "rustc_lexer and rustc_span must use the same Unicode version, \
53            `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are \
54            different."
55        );
56    }
57
58    if rustc_lexer.0 != normalization.0
59        || rustc_lexer.1 != normalization.1
60        || rustc_lexer.2 != normalization.2
61    {
62        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and unicode-normalization must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are different."));
};panic!(
63            "rustc_lexer and unicode-normalization must use the same Unicode version, \
64            `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are \
65            different."
66        );
67    }
68
69    if rustc_lexer.0 != width.0 || rustc_lexer.1 != width.1 || rustc_lexer.2 != width.2 {
70        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and unicode-width must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are different."));
};panic!(
71            "rustc_lexer and unicode-width must use the same Unicode version, \
72            `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are \
73            different."
74        );
75    }
76};
77
78// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
79pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
80    match expr {
81        Ok(expr) => expr,
82        Err(errs) => {
83            for err in errs {
84                err.emit();
85            }
86            FatalError.raise()
87        }
88    }
89}
90
91/// Creates a new parser from a source string.
92///
93/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
94/// etc., otherwise a panic will occur when they are dropped.
95pub fn new_parser_from_source_str(
96    psess: &ParseSess,
97    name: FileName,
98    source: String,
99    strip_tokens: StripTokens,
100) -> Result<Parser<'_>, Vec<Diag<'_>>> {
101    let source_file = psess.source_map().new_source_file(name, source);
102    new_parser_from_source_file(psess, source_file, strip_tokens)
103}
104
105/// Creates a new parser from a filename. On failure, the errors must be consumed via
106/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
107/// dropped.
108///
109/// If a span is given, that is used on an error as the source of the problem.
110pub fn new_parser_from_file<'a>(
111    psess: &'a ParseSess,
112    path: &Path,
113    strip_tokens: StripTokens,
114    sp: Option<Span>,
115) -> Result<Parser<'a>, Vec<Diag<'a>>> {
116    let sm = psess.source_map();
117    let source_file = sm.load_file(path).unwrap_or_else(|e| {
118        let msg = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("couldn\'t read `{0}`: {1}",
                path.display(), e))
    })format!("couldn't read `{}`: {}", path.display(), e);
119        let mut err = psess.dcx().struct_fatal(msg);
120        if let Ok(contents) = std::fs::read(path)
121            && let Err(utf8err) = std::str::from_utf8(&contents)
122        {
123            utf8_error(sm, &path.display().to_string(), sp, &mut err, utf8err, &contents);
124        }
125        if let Some(sp) = sp {
126            err.span(sp);
127        }
128        err.emit()
129    });
130    new_parser_from_source_file(psess, source_file, strip_tokens)
131}
132
133pub fn utf8_error<E: EmissionGuarantee>(
134    sm: &SourceMap,
135    path: &str,
136    sp: Option<Span>,
137    err: &mut Diag<'_, E>,
138    utf8err: Utf8Error,
139    contents: &[u8],
140) {
141    // The file exists, but it wasn't valid UTF-8.
142    let start = utf8err.valid_up_to();
143    let note = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("invalid utf-8 at byte `{0}`",
                start))
    })format!("invalid utf-8 at byte `{start}`");
144    let msg = if let Some(len) = utf8err.error_len() {
145        ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("byte{1} `{0}` {2} not valid utf-8",
                if len == 1 {
                    ::alloc::__export::must_use({
                            ::alloc::fmt::format(format_args!("{0:?}", contents[start]))
                        })
                } else {
                    ::alloc::__export::must_use({
                            ::alloc::fmt::format(format_args!("{0:?}",
                                    &contents[start..start + len]))
                        })
                }, if len == 1 { "" } else { "s" },
                if len == 1 { "is" } else { "are" }))
    })format!(
146            "byte{s} `{bytes}` {are} not valid utf-8",
147            bytes = if len == 1 {
148                format!("{:?}", contents[start])
149            } else {
150                format!("{:?}", &contents[start..start + len])
151            },
152            s = pluralize!(len),
153            are = if len == 1 { "is" } else { "are" },
154        )
155    } else {
156        note.clone()
157    };
158    let contents = String::from_utf8_lossy(contents).to_string();
159
160    // We only emit this error for files in the current session
161    // so the working directory can only be the current working directory
162    let filename = FileName::Real(
163        sm.path_mapping().to_real_filename(sm.working_dir(), PathBuf::from(path).as_path()),
164    );
165    let source = sm.new_source_file(filename, contents);
166
167    // Avoid out-of-bounds span from lossy UTF-8 conversion.
168    if start as u32 > source.normalized_source_len.0 {
169        err.note(note);
170        return;
171    }
172
173    let span = Span::with_root_ctxt(
174        source.normalized_byte_pos(start as u32),
175        source.normalized_byte_pos(start as u32),
176    );
177    if span.is_dummy() {
178        err.note(note);
179    } else {
180        if sp.is_some() {
181            err.span_note(span, msg);
182        } else {
183            err.span(span);
184            err.span_label(span, msg);
185        }
186    }
187}
188
189/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
190/// the initial token stream.
191fn new_parser_from_source_file(
192    psess: &ParseSess,
193    source_file: Arc<SourceFile>,
194    strip_tokens: StripTokens,
195) -> Result<Parser<'_>, Vec<Diag<'_>>> {
196    let end_pos = source_file.end_position();
197    let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?;
198    let mut parser = Parser::new(psess, stream, None);
199    if parser.token == token::Eof {
200        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
201    }
202    Ok(parser)
203}
204
205/// Given a source string, produces a sequence of token trees.
206///
207/// NOTE: This only strips shebangs, not frontmatter!
208pub fn source_str_to_stream(
209    psess: &ParseSess,
210    name: FileName,
211    source: String,
212    override_span: Option<Span>,
213) -> Result<TokenStream, Vec<Diag<'_>>> {
214    let source_file = psess.source_map().new_source_file(name, source);
215    // FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them
216    // in the current edition since that would be breaking.
217    // See also <https://github.com/rust-lang/rust/issues/145520>.
218    // Alternatively, stop stripping shebangs here, too, if T-lang and crater approve.
219    source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
220}
221
222/// Given a source file, produces a sequence of token trees.
223///
224/// Returns any buffered errors from parsing the token stream.
225fn source_file_to_stream<'psess>(
226    psess: &'psess ParseSess,
227    source_file: Arc<SourceFile>,
228    override_span: Option<Span>,
229    strip_tokens: StripTokens,
230) -> Result<TokenStream, Vec<Diag<'psess>>> {
231    let src = source_file.src.as_ref().unwrap_or_else(|| {
232        psess.dcx().bug(::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("cannot lex `source_file` without source: {0}",
                psess.source_map().filename_for_diagnostics(&source_file.name)))
    })format!(
233            "cannot lex `source_file` without source: {}",
234            psess.source_map().filename_for_diagnostics(&source_file.name)
235        ));
236    });
237
238    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens)
239}
240
241/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
242pub fn parse_in<'a, T>(
243    psess: &'a ParseSess,
244    tts: TokenStream,
245    name: &'static str,
246    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
247) -> PResult<'a, T> {
248    let mut parser = Parser::new(psess, tts, Some(name));
249    let result = f(&mut parser)?;
250    if parser.token != token::Eof {
251        parser.unexpected()?;
252    }
253    Ok(result)
254}
255
256pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
257    let source = pprust::item_to_string(item);
258    let filename = FileName::macro_expansion_source_code(&source);
259    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
260}
261
262pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
263    let source = pprust::crate_to_string_for_macros(krate);
264    let filename = FileName::macro_expansion_source_code(&source);
265    unwrap_or_emit_fatal(source_str_to_stream(
266        psess,
267        filename,
268        source,
269        Some(krate.spans.inner_span),
270    ))
271}