Skip to main content

rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![cfg_attr(test, feature(iter_order_by))]
5#![feature(box_patterns)]
6#![feature(debug_closure_helpers)]
7#![feature(default_field_values)]
8#![feature(iter_intersperse)]
9#![recursion_limit = "256"]
10// tidy-alphabetical-end
11
12use std::path::{Path, PathBuf};
13use std::str::Utf8Error;
14use std::sync::Arc;
15
16use rustc_ast as ast;
17use rustc_ast::token;
18use rustc_ast::tokenstream::TokenStream;
19use rustc_ast_pretty::pprust;
20use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
21pub use rustc_lexer::UNICODE_VERSION;
22use rustc_session::parse::ParseSess;
23use rustc_span::source_map::SourceMap;
24use rustc_span::{FileName, SourceFile, Span};
25
26pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
27
28#[macro_use]
29pub mod parser;
30use parser::Parser;
31
32use crate::lexer::StripTokens;
33
34pub mod lexer;
35
36mod errors;
37
38// Make sure that the Unicode version of the dependencies is the same.
39const _: () = {
40    let rustc_lexer = rustc_lexer::UNICODE_VERSION;
41    let rustc_span = rustc_span::UNICODE_VERSION;
42    let normalization = unicode_normalization::UNICODE_VERSION;
43    let width = unicode_width::UNICODE_VERSION;
44
45    if rustc_lexer.0 != rustc_span.0
46        || rustc_lexer.1 != rustc_span.1
47        || rustc_lexer.2 != rustc_span.2
48    {
49        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and rustc_span must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are different."));
};panic!(
50            "rustc_lexer and rustc_span must use the same Unicode version, \
51            `rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are \
52            different."
53        );
54    }
55
56    if rustc_lexer.0 != normalization.0
57        || rustc_lexer.1 != normalization.1
58        || rustc_lexer.2 != normalization.2
59    {
60        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and unicode-normalization must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are different."));
};panic!(
61            "rustc_lexer and unicode-normalization must use the same Unicode version, \
62            `rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are \
63            different."
64        );
65    }
66
67    if rustc_lexer.0 != width.0 || rustc_lexer.1 != width.1 || rustc_lexer.2 != width.2 {
68        {
    ::core::panicking::panic_fmt(format_args!("rustc_lexer and unicode-width must use the same Unicode version, `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are different."));
};panic!(
69            "rustc_lexer and unicode-width must use the same Unicode version, \
70            `rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are \
71            different."
72        );
73    }
74};
75
76// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
77pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
78    match expr {
79        Ok(expr) => expr,
80        Err(errs) => {
81            for err in errs {
82                err.emit();
83            }
84            FatalError.raise()
85        }
86    }
87}
88
89/// Creates a new parser from a source string.
90///
91/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
92/// etc., otherwise a panic will occur when they are dropped.
93pub fn new_parser_from_source_str(
94    psess: &ParseSess,
95    name: FileName,
96    source: String,
97    strip_tokens: StripTokens,
98) -> Result<Parser<'_>, Vec<Diag<'_>>> {
99    let source_file = psess.source_map().new_source_file(name, source);
100    new_parser_from_source_file(psess, source_file, strip_tokens)
101}
102
103/// Creates a new parser from a filename. On failure, the errors must be consumed via
104/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
105/// dropped.
106///
107/// If a span is given, that is used on an error as the source of the problem.
108pub fn new_parser_from_file<'a>(
109    psess: &'a ParseSess,
110    path: &Path,
111    strip_tokens: StripTokens,
112    sp: Option<Span>,
113) -> Result<Parser<'a>, Vec<Diag<'a>>> {
114    let sm = psess.source_map();
115    let source_file = sm.load_file(path).unwrap_or_else(|e| {
116        let msg = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("couldn\'t read `{0}`: {1}",
                path.display(), e))
    })format!("couldn't read `{}`: {}", path.display(), e);
117        let mut err = psess.dcx().struct_fatal(msg);
118        if let Ok(contents) = std::fs::read(path)
119            && let Err(utf8err) = std::str::from_utf8(&contents)
120        {
121            utf8_error(sm, &path.display().to_string(), sp, &mut err, utf8err, &contents);
122        }
123        if let Some(sp) = sp {
124            err.span(sp);
125        }
126        err.emit()
127    });
128    new_parser_from_source_file(psess, source_file, strip_tokens)
129}
130
131pub fn utf8_error<E: EmissionGuarantee>(
132    sm: &SourceMap,
133    path: &str,
134    sp: Option<Span>,
135    err: &mut Diag<'_, E>,
136    utf8err: Utf8Error,
137    contents: &[u8],
138) {
139    // The file exists, but it wasn't valid UTF-8.
140    let start = utf8err.valid_up_to();
141    let note = ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("invalid utf-8 at byte `{0}`",
                start))
    })format!("invalid utf-8 at byte `{start}`");
142    let msg = if let Some(len) = utf8err.error_len() {
143        ::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("byte{1} `{0}` {2} not valid utf-8",
                if len == 1 {
                    ::alloc::__export::must_use({
                            ::alloc::fmt::format(format_args!("{0:?}", contents[start]))
                        })
                } else {
                    ::alloc::__export::must_use({
                            ::alloc::fmt::format(format_args!("{0:?}",
                                    &contents[start..start + len]))
                        })
                }, if len == 1 { "" } else { "s" },
                if len == 1 { "is" } else { "are" }))
    })format!(
144            "byte{s} `{bytes}` {are} not valid utf-8",
145            bytes = if len == 1 {
146                format!("{:?}", contents[start])
147            } else {
148                format!("{:?}", &contents[start..start + len])
149            },
150            s = pluralize!(len),
151            are = if len == 1 { "is" } else { "are" },
152        )
153    } else {
154        note.clone()
155    };
156    let contents = String::from_utf8_lossy(contents).to_string();
157
158    // We only emit this error for files in the current session
159    // so the working directory can only be the current working directory
160    let filename = FileName::Real(
161        sm.path_mapping().to_real_filename(sm.working_dir(), PathBuf::from(path).as_path()),
162    );
163    let source = sm.new_source_file(filename, contents);
164
165    // Avoid out-of-bounds span from lossy UTF-8 conversion.
166    if start as u32 > source.normalized_source_len.0 {
167        err.note(note);
168        return;
169    }
170
171    let span = Span::with_root_ctxt(
172        source.normalized_byte_pos(start as u32),
173        source.normalized_byte_pos(start as u32),
174    );
175    if span.is_dummy() {
176        err.note(note);
177    } else {
178        if sp.is_some() {
179            err.span_note(span, msg);
180        } else {
181            err.span(span);
182            err.span_label(span, msg);
183        }
184    }
185}
186
187/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
188/// the initial token stream.
189fn new_parser_from_source_file(
190    psess: &ParseSess,
191    source_file: Arc<SourceFile>,
192    strip_tokens: StripTokens,
193) -> Result<Parser<'_>, Vec<Diag<'_>>> {
194    let end_pos = source_file.end_position();
195    let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?;
196    let mut parser = Parser::new(psess, stream, None);
197    if parser.token == token::Eof {
198        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
199    }
200    Ok(parser)
201}
202
203/// Given a source string, produces a sequence of token trees.
204///
205/// NOTE: This only strips shebangs, not frontmatter!
206pub fn source_str_to_stream(
207    psess: &ParseSess,
208    name: FileName,
209    source: String,
210    override_span: Option<Span>,
211) -> Result<TokenStream, Vec<Diag<'_>>> {
212    let source_file = psess.source_map().new_source_file(name, source);
213    // FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them
214    // in the current edition since that would be breaking.
215    // See also <https://github.com/rust-lang/rust/issues/145520>.
216    // Alternatively, stop stripping shebangs here, too, if T-lang and crater approve.
217    source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
218}
219
220/// Given a source file, produces a sequence of token trees.
221///
222/// Returns any buffered errors from parsing the token stream.
223fn source_file_to_stream<'psess>(
224    psess: &'psess ParseSess,
225    source_file: Arc<SourceFile>,
226    override_span: Option<Span>,
227    strip_tokens: StripTokens,
228) -> Result<TokenStream, Vec<Diag<'psess>>> {
229    let src = source_file.src.as_ref().unwrap_or_else(|| {
230        psess.dcx().bug(::alloc::__export::must_use({
        ::alloc::fmt::format(format_args!("cannot lex `source_file` without source: {0}",
                psess.source_map().filename_for_diagnostics(&source_file.name)))
    })format!(
231            "cannot lex `source_file` without source: {}",
232            psess.source_map().filename_for_diagnostics(&source_file.name)
233        ));
234    });
235
236    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens)
237}
238
239/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
240pub fn parse_in<'a, T>(
241    psess: &'a ParseSess,
242    tts: TokenStream,
243    name: &'static str,
244    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
245) -> PResult<'a, T> {
246    let mut parser = Parser::new(psess, tts, Some(name));
247    let result = f(&mut parser)?;
248    if parser.token != token::Eof {
249        parser.unexpected()?;
250    }
251    Ok(result)
252}
253
254pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
255    let source = pprust::item_to_string(item);
256    let filename = FileName::macro_expansion_source_code(&source);
257    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
258}
259
260pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
261    let source = pprust::crate_to_string_for_macros(krate);
262    let filename = FileName::macro_expansion_source_code(&source);
263    unwrap_or_emit_fatal(source_str_to_stream(
264        psess,
265        filename,
266        source,
267        Some(krate.spans.inner_span),
268    ))
269}