rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![allow(internal_features)]
5#![allow(rustc::diagnostic_outside_of_impl)]
6#![allow(rustc::untranslatable_diagnostic)]
7#![cfg_attr(doc, recursion_limit = "256")] // FIXME(nnethercote): will be removed by #124141
8#![feature(array_windows)]
9#![feature(assert_matches)]
10#![feature(box_patterns)]
11#![feature(debug_closure_helpers)]
12#![feature(if_let_guard)]
13#![feature(iter_intersperse)]
14#![feature(let_chains)]
15#![feature(string_from_utf8_lossy_owned)]
16// tidy-alphabetical-end
17
18use std::path::{Path, PathBuf};
19use std::str::Utf8Error;
20use std::sync::Arc;
21
22use rustc_ast as ast;
23use rustc_ast::tokenstream::TokenStream;
24use rustc_ast::{AttrItem, Attribute, MetaItemInner, token};
25use rustc_ast_pretty::pprust;
26use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
27use rustc_session::parse::ParseSess;
28use rustc_span::source_map::SourceMap;
29use rustc_span::{FileName, SourceFile, Span};
30pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
31
32pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
33
34#[macro_use]
35pub mod parser;
36use parser::{Parser, make_unclosed_delims_error};
37pub mod lexer;
38pub mod validate_attr;
39
40mod errors;
41
42rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
43
44// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
45pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
46    match expr {
47        Ok(expr) => expr,
48        Err(errs) => {
49            for err in errs {
50                err.emit();
51            }
52            FatalError.raise()
53        }
54    }
55}
56
57/// Creates a new parser from a source string. On failure, the errors must be consumed via
58/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
59/// dropped.
60pub fn new_parser_from_source_str(
61    psess: &ParseSess,
62    name: FileName,
63    source: String,
64) -> Result<Parser<'_>, Vec<Diag<'_>>> {
65    let source_file = psess.source_map().new_source_file(name, source);
66    new_parser_from_source_file(psess, source_file)
67}
68
69/// Creates a new parser from a filename. On failure, the errors must be consumed via
70/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
71/// dropped.
72///
73/// If a span is given, that is used on an error as the source of the problem.
74pub fn new_parser_from_file<'a>(
75    psess: &'a ParseSess,
76    path: &Path,
77    sp: Option<Span>,
78) -> Result<Parser<'a>, Vec<Diag<'a>>> {
79    let sm = psess.source_map();
80    let source_file = sm.load_file(path).unwrap_or_else(|e| {
81        let msg = format!("couldn't read `{}`: {}", path.display(), e);
82        let mut err = psess.dcx().struct_fatal(msg);
83        if let Ok(contents) = std::fs::read(path)
84            && let Err(utf8err) = String::from_utf8(contents.clone())
85        {
86            utf8_error(
87                sm,
88                &path.display().to_string(),
89                sp,
90                &mut err,
91                utf8err.utf8_error(),
92                &contents,
93            );
94        }
95        if let Some(sp) = sp {
96            err.span(sp);
97        }
98        err.emit();
99    });
100    new_parser_from_source_file(psess, source_file)
101}
102
103pub fn utf8_error<E: EmissionGuarantee>(
104    sm: &SourceMap,
105    path: &str,
106    sp: Option<Span>,
107    err: &mut Diag<'_, E>,
108    utf8err: Utf8Error,
109    contents: &[u8],
110) {
111    // The file exists, but it wasn't valid UTF-8.
112    let start = utf8err.valid_up_to();
113    let note = format!("invalid utf-8 at byte `{start}`");
114    let msg = if let Some(len) = utf8err.error_len() {
115        format!(
116            "byte{s} `{bytes}` {are} not valid utf-8",
117            bytes = if len == 1 {
118                format!("{:?}", contents[start])
119            } else {
120                format!("{:?}", &contents[start..start + len])
121            },
122            s = pluralize!(len),
123            are = if len == 1 { "is" } else { "are" },
124        )
125    } else {
126        note.clone()
127    };
128    let contents = String::from_utf8_lossy(contents).to_string();
129    let source = sm.new_source_file(PathBuf::from(path).into(), contents);
130    let span = Span::with_root_ctxt(
131        source.normalized_byte_pos(start as u32),
132        source.normalized_byte_pos(start as u32),
133    );
134    if span.is_dummy() {
135        err.note(note);
136    } else {
137        if sp.is_some() {
138            err.span_note(span, msg);
139        } else {
140            err.span(span);
141            err.span_label(span, msg);
142        }
143    }
144}
145
146/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
147/// the initial token stream.
148fn new_parser_from_source_file(
149    psess: &ParseSess,
150    source_file: Arc<SourceFile>,
151) -> Result<Parser<'_>, Vec<Diag<'_>>> {
152    let end_pos = source_file.end_position();
153    let stream = source_file_to_stream(psess, source_file, None)?;
154    let mut parser = Parser::new(psess, stream, None);
155    if parser.token == token::Eof {
156        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
157    }
158    Ok(parser)
159}
160
161pub fn source_str_to_stream(
162    psess: &ParseSess,
163    name: FileName,
164    source: String,
165    override_span: Option<Span>,
166) -> Result<TokenStream, Vec<Diag<'_>>> {
167    let source_file = psess.source_map().new_source_file(name, source);
168    source_file_to_stream(psess, source_file, override_span)
169}
170
171/// Given a source file, produces a sequence of token trees. Returns any buffered errors from
172/// parsing the token stream.
173fn source_file_to_stream<'psess>(
174    psess: &'psess ParseSess,
175    source_file: Arc<SourceFile>,
176    override_span: Option<Span>,
177) -> Result<TokenStream, Vec<Diag<'psess>>> {
178    let src = source_file.src.as_ref().unwrap_or_else(|| {
179        psess.dcx().bug(format!(
180            "cannot lex `source_file` without source: {}",
181            psess.source_map().filename_for_diagnostics(&source_file.name)
182        ));
183    });
184
185    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span)
186}
187
188/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
189pub fn parse_in<'a, T>(
190    psess: &'a ParseSess,
191    tts: TokenStream,
192    name: &'static str,
193    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
194) -> PResult<'a, T> {
195    let mut parser = Parser::new(psess, tts, Some(name));
196    let result = f(&mut parser)?;
197    if parser.token != token::Eof {
198        parser.unexpected()?;
199    }
200    Ok(result)
201}
202
203pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
204    let source = pprust::item_to_string(item);
205    let filename = FileName::macro_expansion_source_code(&source);
206    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
207}
208
209pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
210    let source = pprust::crate_to_string_for_macros(krate);
211    let filename = FileName::macro_expansion_source_code(&source);
212    unwrap_or_emit_fatal(source_str_to_stream(
213        psess,
214        filename,
215        source,
216        Some(krate.spans.inner_span),
217    ))
218}
219
220pub fn parse_cfg_attr(
221    cfg_attr: &Attribute,
222    psess: &ParseSess,
223) -> Option<(MetaItemInner, Vec<(AttrItem, Span)>)> {
224    const CFG_ATTR_GRAMMAR_HELP: &str = "#[cfg_attr(condition, attribute, other_attribute, ...)]";
225    const CFG_ATTR_NOTE_REF: &str = "for more information, visit \
226        <https://doc.rust-lang.org/reference/conditional-compilation.html#the-cfg_attr-attribute>";
227
228    match cfg_attr.get_normal_item().args {
229        ast::AttrArgs::Delimited(ast::DelimArgs { dspan, delim, ref tokens })
230            if !tokens.is_empty() =>
231        {
232            crate::validate_attr::check_cfg_attr_bad_delim(psess, dspan, delim);
233            match parse_in(psess, tokens.clone(), "`cfg_attr` input", |p| p.parse_cfg_attr()) {
234                Ok(r) => return Some(r),
235                Err(e) => {
236                    e.with_help(format!("the valid syntax is `{CFG_ATTR_GRAMMAR_HELP}`"))
237                        .with_note(CFG_ATTR_NOTE_REF)
238                        .emit();
239                }
240            }
241        }
242        _ => {
243            psess.dcx().emit_err(errors::MalformedCfgAttr {
244                span: cfg_attr.span,
245                sugg: CFG_ATTR_GRAMMAR_HELP,
246            });
247        }
248    }
249    None
250}