rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![allow(rustc::diagnostic_outside_of_impl)]
5#![allow(rustc::untranslatable_diagnostic)]
6#![feature(assert_matches)]
7#![feature(box_patterns)]
8#![feature(debug_closure_helpers)]
9#![feature(default_field_values)]
10#![feature(if_let_guard)]
11#![feature(iter_intersperse)]
12#![feature(iter_order_by)]
13#![recursion_limit = "256"]
14// tidy-alphabetical-end
15
16use std::path::{Path, PathBuf};
17use std::str::Utf8Error;
18use std::sync::Arc;
19
20use rustc_ast as ast;
21use rustc_ast::token;
22use rustc_ast::tokenstream::TokenStream;
23use rustc_ast_pretty::pprust;
24use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
25use rustc_session::parse::ParseSess;
26use rustc_span::source_map::SourceMap;
27use rustc_span::{FileName, SourceFile, Span};
28pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
29
30pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
31
32#[macro_use]
33pub mod parser;
34use parser::Parser;
35
36use crate::lexer::StripTokens;
37
38pub mod lexer;
39
40mod errors;
41
42rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
43
44// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
45pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
46    match expr {
47        Ok(expr) => expr,
48        Err(errs) => {
49            for err in errs {
50                err.emit();
51            }
52            FatalError.raise()
53        }
54    }
55}
56
57/// Creates a new parser from a source string.
58///
59/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
60/// etc., otherwise a panic will occur when they are dropped.
61pub fn new_parser_from_source_str(
62    psess: &ParseSess,
63    name: FileName,
64    source: String,
65    strip_tokens: StripTokens,
66) -> Result<Parser<'_>, Vec<Diag<'_>>> {
67    let source_file = psess.source_map().new_source_file(name, source);
68    new_parser_from_source_file(psess, source_file, strip_tokens)
69}
70
71/// Creates a new parser from a filename. On failure, the errors must be consumed via
72/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
73/// dropped.
74///
75/// If a span is given, that is used on an error as the source of the problem.
76pub fn new_parser_from_file<'a>(
77    psess: &'a ParseSess,
78    path: &Path,
79    strip_tokens: StripTokens,
80    sp: Option<Span>,
81) -> Result<Parser<'a>, Vec<Diag<'a>>> {
82    let sm = psess.source_map();
83    let source_file = sm.load_file(path).unwrap_or_else(|e| {
84        let msg = format!("couldn't read `{}`: {}", path.display(), e);
85        let mut err = psess.dcx().struct_fatal(msg);
86        if let Ok(contents) = std::fs::read(path)
87            && let Err(utf8err) = String::from_utf8(contents.clone())
88        {
89            utf8_error(
90                sm,
91                &path.display().to_string(),
92                sp,
93                &mut err,
94                utf8err.utf8_error(),
95                &contents,
96            );
97        }
98        if let Some(sp) = sp {
99            err.span(sp);
100        }
101        err.emit();
102    });
103    new_parser_from_source_file(psess, source_file, strip_tokens)
104}
105
106pub fn utf8_error<E: EmissionGuarantee>(
107    sm: &SourceMap,
108    path: &str,
109    sp: Option<Span>,
110    err: &mut Diag<'_, E>,
111    utf8err: Utf8Error,
112    contents: &[u8],
113) {
114    // The file exists, but it wasn't valid UTF-8.
115    let start = utf8err.valid_up_to();
116    let note = format!("invalid utf-8 at byte `{start}`");
117    let msg = if let Some(len) = utf8err.error_len() {
118        format!(
119            "byte{s} `{bytes}` {are} not valid utf-8",
120            bytes = if len == 1 {
121                format!("{:?}", contents[start])
122            } else {
123                format!("{:?}", &contents[start..start + len])
124            },
125            s = pluralize!(len),
126            are = if len == 1 { "is" } else { "are" },
127        )
128    } else {
129        note.clone()
130    };
131    let contents = String::from_utf8_lossy(contents).to_string();
132
133    // We only emit this error for files in the current session
134    // so the working directory can only be the current working directory
135    let filename = FileName::Real(
136        sm.path_mapping().to_real_filename(sm.working_dir(), PathBuf::from(path).as_path()),
137    );
138    let source = sm.new_source_file(filename, contents);
139
140    // Avoid out-of-bounds span from lossy UTF-8 conversion.
141    if start as u32 > source.normalized_source_len.0 {
142        err.note(note);
143        return;
144    }
145
146    let span = Span::with_root_ctxt(
147        source.normalized_byte_pos(start as u32),
148        source.normalized_byte_pos(start as u32),
149    );
150    if span.is_dummy() {
151        err.note(note);
152    } else {
153        if sp.is_some() {
154            err.span_note(span, msg);
155        } else {
156            err.span(span);
157            err.span_label(span, msg);
158        }
159    }
160}
161
162/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
163/// the initial token stream.
164fn new_parser_from_source_file(
165    psess: &ParseSess,
166    source_file: Arc<SourceFile>,
167    strip_tokens: StripTokens,
168) -> Result<Parser<'_>, Vec<Diag<'_>>> {
169    let end_pos = source_file.end_position();
170    let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?;
171    let mut parser = Parser::new(psess, stream, None);
172    if parser.token == token::Eof {
173        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
174    }
175    Ok(parser)
176}
177
178/// Given a source string, produces a sequence of token trees.
179///
180/// NOTE: This only strips shebangs, not frontmatter!
181pub fn source_str_to_stream(
182    psess: &ParseSess,
183    name: FileName,
184    source: String,
185    override_span: Option<Span>,
186) -> Result<TokenStream, Vec<Diag<'_>>> {
187    let source_file = psess.source_map().new_source_file(name, source);
188    // FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them
189    // in the current edition since that would be breaking.
190    // See also <https://github.com/rust-lang/rust/issues/145520>.
191    // Alternatively, stop stripping shebangs here, too, if T-lang and crater approve.
192    source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
193}
194
195/// Given a source file, produces a sequence of token trees.
196///
197/// Returns any buffered errors from parsing the token stream.
198fn source_file_to_stream<'psess>(
199    psess: &'psess ParseSess,
200    source_file: Arc<SourceFile>,
201    override_span: Option<Span>,
202    strip_tokens: StripTokens,
203) -> Result<TokenStream, Vec<Diag<'psess>>> {
204    let src = source_file.src.as_ref().unwrap_or_else(|| {
205        psess.dcx().bug(format!(
206            "cannot lex `source_file` without source: {}",
207            psess.source_map().filename_for_diagnostics(&source_file.name)
208        ));
209    });
210
211    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens)
212}
213
214/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
215pub fn parse_in<'a, T>(
216    psess: &'a ParseSess,
217    tts: TokenStream,
218    name: &'static str,
219    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
220) -> PResult<'a, T> {
221    let mut parser = Parser::new(psess, tts, Some(name));
222    let result = f(&mut parser)?;
223    if parser.token != token::Eof {
224        parser.unexpected()?;
225    }
226    Ok(result)
227}
228
229pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
230    let source = pprust::item_to_string(item);
231    let filename = FileName::macro_expansion_source_code(&source);
232    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
233}
234
235pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
236    let source = pprust::crate_to_string_for_macros(krate);
237    let filename = FileName::macro_expansion_source_code(&source);
238    unwrap_or_emit_fatal(source_str_to_stream(
239        psess,
240        filename,
241        source,
242        Some(krate.spans.inner_span),
243    ))
244}