rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![allow(rustc::diagnostic_outside_of_impl)]
5#![allow(rustc::untranslatable_diagnostic)]
6#![feature(assert_matches)]
7#![feature(box_patterns)]
8#![feature(debug_closure_helpers)]
9#![feature(default_field_values)]
10#![feature(if_let_guard)]
11#![feature(iter_intersperse)]
12#![feature(iter_order_by)]
13#![recursion_limit = "256"]
14// tidy-alphabetical-end
15
16use std::path::{Path, PathBuf};
17use std::str::Utf8Error;
18use std::sync::Arc;
19
20use rustc_ast as ast;
21use rustc_ast::tokenstream::{DelimSpan, TokenStream};
22use rustc_ast::{AttrItem, Attribute, MetaItemInner, token};
23use rustc_ast_pretty::pprust;
24use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
25use rustc_session::parse::ParseSess;
26use rustc_span::source_map::SourceMap;
27use rustc_span::{FileName, SourceFile, Span};
28pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
29
30pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
31
32#[macro_use]
33pub mod parser;
34use parser::Parser;
35use rustc_ast::token::Delimiter;
36
37use crate::lexer::StripTokens;
38
39pub mod lexer;
40
41mod errors;
42
43rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
44
45// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
46pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
47    match expr {
48        Ok(expr) => expr,
49        Err(errs) => {
50            for err in errs {
51                err.emit();
52            }
53            FatalError.raise()
54        }
55    }
56}
57
58/// Creates a new parser from a source string.
59///
60/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
61/// etc., otherwise a panic will occur when they are dropped.
62pub fn new_parser_from_source_str(
63    psess: &ParseSess,
64    name: FileName,
65    source: String,
66    strip_tokens: StripTokens,
67) -> Result<Parser<'_>, Vec<Diag<'_>>> {
68    let source_file = psess.source_map().new_source_file(name, source);
69    new_parser_from_source_file(psess, source_file, strip_tokens)
70}
71
72/// Creates a new parser from a filename. On failure, the errors must be consumed via
73/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
74/// dropped.
75///
76/// If a span is given, that is used on an error as the source of the problem.
77pub fn new_parser_from_file<'a>(
78    psess: &'a ParseSess,
79    path: &Path,
80    strip_tokens: StripTokens,
81    sp: Option<Span>,
82) -> Result<Parser<'a>, Vec<Diag<'a>>> {
83    let sm = psess.source_map();
84    let source_file = sm.load_file(path).unwrap_or_else(|e| {
85        let msg = format!("couldn't read `{}`: {}", path.display(), e);
86        let mut err = psess.dcx().struct_fatal(msg);
87        if let Ok(contents) = std::fs::read(path)
88            && let Err(utf8err) = String::from_utf8(contents.clone())
89        {
90            utf8_error(
91                sm,
92                &path.display().to_string(),
93                sp,
94                &mut err,
95                utf8err.utf8_error(),
96                &contents,
97            );
98        }
99        if let Some(sp) = sp {
100            err.span(sp);
101        }
102        err.emit();
103    });
104    new_parser_from_source_file(psess, source_file, strip_tokens)
105}
106
107pub fn utf8_error<E: EmissionGuarantee>(
108    sm: &SourceMap,
109    path: &str,
110    sp: Option<Span>,
111    err: &mut Diag<'_, E>,
112    utf8err: Utf8Error,
113    contents: &[u8],
114) {
115    // The file exists, but it wasn't valid UTF-8.
116    let start = utf8err.valid_up_to();
117    let note = format!("invalid utf-8 at byte `{start}`");
118    let msg = if let Some(len) = utf8err.error_len() {
119        format!(
120            "byte{s} `{bytes}` {are} not valid utf-8",
121            bytes = if len == 1 {
122                format!("{:?}", contents[start])
123            } else {
124                format!("{:?}", &contents[start..start + len])
125            },
126            s = pluralize!(len),
127            are = if len == 1 { "is" } else { "are" },
128        )
129    } else {
130        note.clone()
131    };
132    let contents = String::from_utf8_lossy(contents).to_string();
133    let source = sm.new_source_file(PathBuf::from(path).into(), contents);
134    let span = Span::with_root_ctxt(
135        source.normalized_byte_pos(start as u32),
136        source.normalized_byte_pos(start as u32),
137    );
138    if span.is_dummy() {
139        err.note(note);
140    } else {
141        if sp.is_some() {
142            err.span_note(span, msg);
143        } else {
144            err.span(span);
145            err.span_label(span, msg);
146        }
147    }
148}
149
150/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
151/// the initial token stream.
152fn new_parser_from_source_file(
153    psess: &ParseSess,
154    source_file: Arc<SourceFile>,
155    strip_tokens: StripTokens,
156) -> Result<Parser<'_>, Vec<Diag<'_>>> {
157    let end_pos = source_file.end_position();
158    let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?;
159    let mut parser = Parser::new(psess, stream, None);
160    if parser.token == token::Eof {
161        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
162    }
163    Ok(parser)
164}
165
166/// Given a source string, produces a sequence of token trees.
167///
168/// NOTE: This only strips shebangs, not frontmatter!
169pub fn source_str_to_stream(
170    psess: &ParseSess,
171    name: FileName,
172    source: String,
173    override_span: Option<Span>,
174) -> Result<TokenStream, Vec<Diag<'_>>> {
175    let source_file = psess.source_map().new_source_file(name, source);
176    // FIXME(frontmatter): Consider stripping frontmatter in a future edition. We can't strip them
177    // in the current edition since that would be breaking.
178    // See also <https://github.com/rust-lang/rust/issues/145520>.
179    // Alternatively, stop stripping shebangs here, too, if T-lang and crater approve.
180    source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
181}
182
183/// Given a source file, produces a sequence of token trees.
184///
185/// Returns any buffered errors from parsing the token stream.
186fn source_file_to_stream<'psess>(
187    psess: &'psess ParseSess,
188    source_file: Arc<SourceFile>,
189    override_span: Option<Span>,
190    strip_tokens: StripTokens,
191) -> Result<TokenStream, Vec<Diag<'psess>>> {
192    let src = source_file.src.as_ref().unwrap_or_else(|| {
193        psess.dcx().bug(format!(
194            "cannot lex `source_file` without source: {}",
195            psess.source_map().filename_for_diagnostics(&source_file.name)
196        ));
197    });
198
199    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens)
200}
201
202/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
203pub fn parse_in<'a, T>(
204    psess: &'a ParseSess,
205    tts: TokenStream,
206    name: &'static str,
207    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
208) -> PResult<'a, T> {
209    let mut parser = Parser::new(psess, tts, Some(name));
210    let result = f(&mut parser)?;
211    if parser.token != token::Eof {
212        parser.unexpected()?;
213    }
214    Ok(result)
215}
216
217pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
218    let source = pprust::item_to_string(item);
219    let filename = FileName::macro_expansion_source_code(&source);
220    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
221}
222
223pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
224    let source = pprust::crate_to_string_for_macros(krate);
225    let filename = FileName::macro_expansion_source_code(&source);
226    unwrap_or_emit_fatal(source_str_to_stream(
227        psess,
228        filename,
229        source,
230        Some(krate.spans.inner_span),
231    ))
232}
233
234pub fn parse_cfg_attr(
235    cfg_attr: &Attribute,
236    psess: &ParseSess,
237) -> Option<(MetaItemInner, Vec<(AttrItem, Span)>)> {
238    const CFG_ATTR_GRAMMAR_HELP: &str = "#[cfg_attr(condition, attribute, other_attribute, ...)]";
239    const CFG_ATTR_NOTE_REF: &str = "for more information, visit \
240        <https://doc.rust-lang.org/reference/conditional-compilation.html#the-cfg_attr-attribute>";
241
242    match cfg_attr.get_normal_item().args {
243        ast::AttrArgs::Delimited(ast::DelimArgs { dspan, delim, ref tokens })
244            if !tokens.is_empty() =>
245        {
246            check_cfg_attr_bad_delim(psess, dspan, delim);
247            match parse_in(psess, tokens.clone(), "`cfg_attr` input", |p| p.parse_cfg_attr()) {
248                Ok(r) => return Some(r),
249                Err(e) => {
250                    e.with_help(format!("the valid syntax is `{CFG_ATTR_GRAMMAR_HELP}`"))
251                        .with_note(CFG_ATTR_NOTE_REF)
252                        .emit();
253                }
254            }
255        }
256        _ => {
257            psess.dcx().emit_err(errors::MalformedCfgAttr {
258                span: cfg_attr.span,
259                sugg: CFG_ATTR_GRAMMAR_HELP,
260            });
261        }
262    }
263    None
264}
265
266fn check_cfg_attr_bad_delim(psess: &ParseSess, span: DelimSpan, delim: Delimiter) {
267    if let Delimiter::Parenthesis = delim {
268        return;
269    }
270    psess.dcx().emit_err(errors::CfgAttrBadDelim {
271        span: span.entire(),
272        sugg: errors::MetaBadDelimSugg { open: span.open, close: span.close },
273    });
274}