1use diagnostics::make_errors_for_mismatched_closing_delims;
2use rustc_ast::ast::{self, AttrStyle};
3use rustc_ast::token::{self, CommentKind, Delimiter, IdentIsRaw, Token, TokenKind};
4use rustc_ast::tokenstream::TokenStream;
5use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_control_chars};
6use rustc_errors::codes::*;
7use rustc_errors::{Applicability, Diag, DiagCtxtHandle, Diagnostic, StashKey};
8use rustc_lexer::{
9 Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace,
10};
11use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
12use rustc_session::lint::builtin::{
13 RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
14 TEXT_DIRECTION_CODEPOINT_IN_COMMENT, TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
15};
16use rustc_session::parse::ParseSess;
17use rustc_span::{BytePos, Pos, Span, Symbol, sym};
18use tracing::debug;
19
20use crate::errors;
21use crate::lexer::diagnostics::TokenTreeDiagInfo;
22use crate::lexer::unicode_chars::UNICODE_ARRAY;
23
24mod diagnostics;
25mod tokentrees;
26mod unescape_error_reporting;
27mod unicode_chars;
28
29use unescape_error_reporting::{emit_unescape_error, escaped_char};
30
31#[cfg(target_pointer_width = "64")]
36const _: [(); 12] = [(); ::std::mem::size_of::<rustc_lexer::Token>()];rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);
37
38const INVISIBLE_CHARACTERS: [char; 8] = [
39 '\u{200b}', '\u{200c}', '\u{2060}', '\u{2061}', '\u{2062}', '\u{00ad}', '\u{034f}', '\u{061c}',
40];
41
42#[derive(#[automatically_derived]
impl ::core::clone::Clone for UnmatchedDelim {
#[inline]
fn clone(&self) -> UnmatchedDelim {
UnmatchedDelim {
found_delim: ::core::clone::Clone::clone(&self.found_delim),
found_span: ::core::clone::Clone::clone(&self.found_span),
unclosed_span: ::core::clone::Clone::clone(&self.unclosed_span),
candidate_span: ::core::clone::Clone::clone(&self.candidate_span),
}
}
}Clone, #[automatically_derived]
impl ::core::fmt::Debug for UnmatchedDelim {
#[inline]
fn fmt(&self, f: &mut ::core::fmt::Formatter) -> ::core::fmt::Result {
::core::fmt::Formatter::debug_struct_field4_finish(f,
"UnmatchedDelim", "found_delim", &self.found_delim, "found_span",
&self.found_span, "unclosed_span", &self.unclosed_span,
"candidate_span", &&self.candidate_span)
}
}Debug)]
43pub(crate) struct UnmatchedDelim {
44 pub found_delim: Option<Delimiter>,
45 pub found_span: Span,
46 pub unclosed_span: Option<Span>,
47 pub candidate_span: Option<Span>,
48}
49
50pub enum StripTokens {
52 ShebangAndFrontmatter,
54 Shebang,
59 Nothing,
64}
65
66pub(crate) fn lex_token_trees<'psess, 'src>(
67 psess: &'psess ParseSess,
68 mut src: &'src str,
69 mut start_pos: BytePos,
70 override_span: Option<Span>,
71 strip_tokens: StripTokens,
72) -> Result<TokenStream, Vec<Diag<'psess>>> {
73 match strip_tokens {
74 StripTokens::Shebang | StripTokens::ShebangAndFrontmatter => {
75 if let Some(shebang_len) = rustc_lexer::strip_shebang(src) {
76 src = &src[shebang_len..];
77 start_pos = start_pos + BytePos::from_usize(shebang_len);
78 }
79 }
80 StripTokens::Nothing => {}
81 }
82
83 let frontmatter_allowed = match strip_tokens {
84 StripTokens::ShebangAndFrontmatter => FrontmatterAllowed::Yes,
85 StripTokens::Shebang | StripTokens::Nothing => FrontmatterAllowed::No,
86 };
87
88 let cursor = Cursor::new(src, frontmatter_allowed);
89 let mut lexer = Lexer {
90 psess,
91 start_pos,
92 pos: start_pos,
93 src,
94 cursor,
95 override_span,
96 nbsp_is_whitespace: false,
97 last_lifetime: None,
98 token: Token::dummy(),
99 diag_info: TokenTreeDiagInfo::default(),
100 };
101 let res = lexer.lex_token_trees(false);
102
103 let mut unmatched_closing_delims: Vec<_> =
104 make_errors_for_mismatched_closing_delims(&lexer.diag_info.unmatched_delims, psess);
105
106 match res {
107 Ok((_open_spacing, stream)) => {
108 if unmatched_closing_delims.is_empty() {
109 Ok(stream)
110 } else {
111 Err(unmatched_closing_delims)
113 }
114 }
115 Err(errs) => {
116 unmatched_closing_delims.extend(errs);
119 Err(unmatched_closing_delims)
120 }
121 }
122}
123
124struct Lexer<'psess, 'src> {
125 psess: &'psess ParseSess,
126 start_pos: BytePos,
128 pos: BytePos,
130 src: &'src str,
132 cursor: Cursor<'src>,
134 override_span: Option<Span>,
135 nbsp_is_whitespace: bool,
139
140 last_lifetime: Option<Span>,
143
144 token: Token,
146
147 diag_info: TokenTreeDiagInfo,
148}
149
150impl<'psess, 'src> Lexer<'psess, 'src> {
151 fn dcx(&self) -> DiagCtxtHandle<'psess> {
152 self.psess.dcx()
153 }
154
155 fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span {
156 self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi))
157 }
158
159 fn next_token_from_cursor(&mut self) -> (Token, bool) {
162 let mut preceded_by_whitespace = false;
163 let mut swallow_next_invalid = 0;
164 loop {
166 let str_before = self.cursor.as_str();
167 let token = self.cursor.advance_token();
168 let start = self.pos;
169 self.pos = self.pos + BytePos(token.len);
170
171 {
use ::tracing::__macro_support::Callsite as _;
static __CALLSITE: ::tracing::callsite::DefaultCallsite =
{
static META: ::tracing::Metadata<'static> =
{
::tracing_core::metadata::Metadata::new("event compiler/rustc_parse/src/lexer/mod.rs:171",
"rustc_parse::lexer", ::tracing::Level::DEBUG,
::tracing_core::__macro_support::Option::Some("compiler/rustc_parse/src/lexer/mod.rs"),
::tracing_core::__macro_support::Option::Some(171u32),
::tracing_core::__macro_support::Option::Some("rustc_parse::lexer"),
::tracing_core::field::FieldSet::new(&["message"],
::tracing_core::callsite::Identifier(&__CALLSITE)),
::tracing::metadata::Kind::EVENT)
};
::tracing::callsite::DefaultCallsite::new(&META)
};
let enabled =
::tracing::Level::DEBUG <= ::tracing::level_filters::STATIC_MAX_LEVEL
&&
::tracing::Level::DEBUG <=
::tracing::level_filters::LevelFilter::current() &&
{
let interest = __CALLSITE.interest();
!interest.is_never() &&
::tracing::__macro_support::__is_enabled(__CALLSITE.metadata(),
interest)
};
if enabled {
(|value_set: ::tracing::field::ValueSet|
{
let meta = __CALLSITE.metadata();
::tracing::Event::dispatch(meta, &value_set);
;
})({
#[allow(unused_imports)]
use ::tracing::field::{debug, display, Value};
let mut iter = __CALLSITE.metadata().fields().iter();
__CALLSITE.metadata().fields().value_set(&[(&::tracing::__macro_support::Iterator::next(&mut iter).expect("FieldSet corrupted (this is a bug)"),
::tracing::__macro_support::Option::Some(&format_args!("next_token: {0:?}({1:?})",
token.kind, self.str_from(start)) as &dyn Value))])
});
} else { ; }
};debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));
172
173 if let rustc_lexer::TokenKind::Semi
174 | rustc_lexer::TokenKind::LineComment { .. }
175 | rustc_lexer::TokenKind::BlockComment { .. }
176 | rustc_lexer::TokenKind::CloseParen
177 | rustc_lexer::TokenKind::CloseBrace
178 | rustc_lexer::TokenKind::CloseBracket = token.kind
179 {
180 self.last_lifetime = None;
183 }
184
185 let kind = match token.kind {
189 rustc_lexer::TokenKind::LineComment { doc_style } => {
190 let Some(doc_style) = doc_style else {
192 self.lint_unicode_text_flow(start);
193 preceded_by_whitespace = true;
194 continue;
195 };
196
197 let content_start = start + BytePos(3);
199 let content = self.str_from(content_start);
200 self.lint_doc_comment_unicode_text_flow(start, content);
201 self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style)
202 }
203 rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => {
204 if !terminated {
205 self.report_unterminated_block_comment(start, doc_style);
206 }
207
208 let Some(doc_style) = doc_style else {
210 self.lint_unicode_text_flow(start);
211 preceded_by_whitespace = true;
212 continue;
213 };
214
215 let content_start = start + BytePos(3);
218 let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
219 let content = self.str_from_to(content_start, content_end);
220 self.lint_doc_comment_unicode_text_flow(start, content);
221 self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
222 }
223 rustc_lexer::TokenKind::Frontmatter { has_invalid_preceding_whitespace, invalid_infostring } => {
224 self.validate_frontmatter(start, has_invalid_preceding_whitespace, invalid_infostring);
225 preceded_by_whitespace = true;
226 continue;
227 }
228 rustc_lexer::TokenKind::Whitespace => {
229 preceded_by_whitespace = true;
230 continue;
231 }
232 rustc_lexer::TokenKind::Ident => self.ident(start),
233 rustc_lexer::TokenKind::RawIdent => {
234 let sym = nfc_normalize(self.str_from(start + BytePos(2)));
235 let span = self.mk_sp(start, self.pos);
236 self.psess.symbol_gallery.insert(sym, span);
237 if !sym.can_be_raw() {
238 self.dcx().emit_err(errors::CannotBeRawIdent { span, ident: sym });
239 }
240 self.psess.raw_identifier_spans.push(span);
241 token::Ident(sym, IdentIsRaw::Yes)
242 }
243 rustc_lexer::TokenKind::UnknownPrefix => {
244 self.report_unknown_prefix(start);
245 self.ident(start)
246 }
247 rustc_lexer::TokenKind::UnknownPrefixLifetime => {
248 self.report_unknown_prefix(start);
249 let lifetime_name = self.str_from(start);
253 self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
254 let ident = Symbol::intern(lifetime_name);
255 token::Lifetime(ident, IdentIsRaw::No)
256 }
257 rustc_lexer::TokenKind::InvalidIdent
258 if !UNICODE_ARRAY.iter().any(|&(c, _, _)| {
261 let sym = self.str_from(start);
262 sym.chars().count() == 1 && c == sym.chars().next().unwrap()
263 }) =>
264 {
265 let sym = nfc_normalize(self.str_from(start));
266 let span = self.mk_sp(start, self.pos);
267 self.psess
268 .bad_unicode_identifiers
269 .borrow_mut()
270 .entry(sym)
271 .or_default()
272 .push(span);
273 token::Ident(sym, IdentIsRaw::No)
274 }
275 rustc_lexer::TokenKind::Literal {
278 kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
279 suffix_start: _,
280 } if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
281 let prefix_len = match kind {
282 LiteralKind::CStr { .. } => 1,
283 LiteralKind::RawCStr { .. } => 2,
284 _ => ::core::panicking::panic("internal error: entered unreachable code")unreachable!(),
285 };
286
287 let lit_start = start + BytePos(prefix_len);
290 self.pos = lit_start;
291 self.cursor = Cursor::new(&str_before[prefix_len as usize..], FrontmatterAllowed::No);
292 self.report_unknown_prefix(start);
293 let prefix_span = self.mk_sp(start, lit_start);
294 return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
295 }
296 rustc_lexer::TokenKind::GuardedStrPrefix => {
297 self.maybe_report_guarded_str(start, str_before)
298 }
299 rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
300 let suffix_start = start + BytePos(suffix_start);
301 let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
302 let suffix = if suffix_start < self.pos {
303 let string = self.str_from(suffix_start);
304 if string == "_" {
305 self.dcx().emit_err(errors::UnderscoreLiteralSuffix {
306 span: self.mk_sp(suffix_start, self.pos),
307 });
308 None
309 } else {
310 Some(Symbol::intern(string))
311 }
312 } else {
313 None
314 };
315 self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal");
316 token::Literal(token::Lit { kind, symbol, suffix })
317 }
318 rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
319 let lifetime_name = nfc_normalize(self.str_from(start));
323 self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
324 if starts_with_number {
325 let span = self.mk_sp(start, self.pos);
326 self.dcx()
327 .struct_err("lifetimes cannot start with a number")
328 .with_span(span)
329 .stash(span, StashKey::LifetimeIsChar);
330 }
331 token::Lifetime(lifetime_name, IdentIsRaw::No)
332 }
333 rustc_lexer::TokenKind::RawLifetime => {
334 self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
335
336 let ident_start = start + BytePos(3);
337 let prefix_span = self.mk_sp(start, ident_start);
338
339 if prefix_span.at_least_rust_2021() {
340 if self.cursor.as_str().starts_with('\'') {
346 let lit_span = self.mk_sp(start, self.pos + BytePos(1));
347 let contents = self.str_from_to(start + BytePos(1), self.pos);
348 emit_unescape_error(
349 self.dcx(),
350 contents,
351 lit_span,
352 lit_span,
353 Mode::Char,
354 0..contents.len(),
355 EscapeError::MoreThanOneChar,
356 )
357 .expect("expected error");
358 }
359
360 let span = self.mk_sp(start, self.pos);
361
362 let lifetime_name_without_tick =
363 Symbol::intern(&self.str_from(ident_start));
364 if !lifetime_name_without_tick.can_be_raw() {
365 self.dcx().emit_err(
366 errors::CannotBeRawLifetime {
367 span,
368 ident: lifetime_name_without_tick
369 }
370 );
371 }
372
373 let mut lifetime_name =
375 String::with_capacity(lifetime_name_without_tick.as_str().len() + 1);
376 lifetime_name.push('\'');
377 lifetime_name += lifetime_name_without_tick.as_str();
378 let sym = nfc_normalize(&lifetime_name);
379
380 self.psess.raw_identifier_spans.push(span);
382
383 token::Lifetime(sym, IdentIsRaw::Yes)
384 } else {
385 self.psess.buffer_lint(
387 RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
388 prefix_span,
389 ast::CRATE_NODE_ID,
390 errors::RawPrefix {
391 label: prefix_span,
392 suggestion: prefix_span.shrink_to_hi()
393 },
394 );
395
396 let lt_start = start + BytePos(2);
398 self.pos = lt_start;
399 self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No);
400
401 let lifetime_name = nfc_normalize(self.str_from(start));
402 token::Lifetime(lifetime_name, IdentIsRaw::No)
403 }
404 }
405 rustc_lexer::TokenKind::Semi => token::Semi,
406 rustc_lexer::TokenKind::Comma => token::Comma,
407 rustc_lexer::TokenKind::Dot => token::Dot,
408 rustc_lexer::TokenKind::OpenParen => token::OpenParen,
409 rustc_lexer::TokenKind::CloseParen => token::CloseParen,
410 rustc_lexer::TokenKind::OpenBrace => token::OpenBrace,
411 rustc_lexer::TokenKind::CloseBrace => token::CloseBrace,
412 rustc_lexer::TokenKind::OpenBracket => token::OpenBracket,
413 rustc_lexer::TokenKind::CloseBracket => token::CloseBracket,
414 rustc_lexer::TokenKind::At => token::At,
415 rustc_lexer::TokenKind::Pound => token::Pound,
416 rustc_lexer::TokenKind::Tilde => token::Tilde,
417 rustc_lexer::TokenKind::Question => token::Question,
418 rustc_lexer::TokenKind::Colon => token::Colon,
419 rustc_lexer::TokenKind::Dollar => token::Dollar,
420 rustc_lexer::TokenKind::Eq => token::Eq,
421 rustc_lexer::TokenKind::Bang => token::Bang,
422 rustc_lexer::TokenKind::Lt => token::Lt,
423 rustc_lexer::TokenKind::Gt => token::Gt,
424 rustc_lexer::TokenKind::Minus => token::Minus,
425 rustc_lexer::TokenKind::And => token::And,
426 rustc_lexer::TokenKind::Or => token::Or,
427 rustc_lexer::TokenKind::Plus => token::Plus,
428 rustc_lexer::TokenKind::Star => token::Star,
429 rustc_lexer::TokenKind::Slash => token::Slash,
430 rustc_lexer::TokenKind::Caret => token::Caret,
431 rustc_lexer::TokenKind::Percent => token::Percent,
432
433 rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
434 if swallow_next_invalid > 0 {
436 swallow_next_invalid -= 1;
437 continue;
438 }
439 let mut it = self.str_from_to_end(start).chars();
440 let c = it.next().unwrap();
441 if c == '\u{00a0}' {
442 if self.nbsp_is_whitespace {
446 preceded_by_whitespace = true;
447 continue;
448 }
449 self.nbsp_is_whitespace = true;
450 }
451 let repeats = it.take_while(|c1| *c1 == c).count();
452 let (token, sugg) =
459 unicode_chars::check_for_substitution(self, start, c, repeats + 1);
460 self.dcx().emit_err(errors::UnknownTokenStart {
461 span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())),
462 escaped: escaped_char(c),
463 sugg,
464 null: c == '\x00',
465 invisible: INVISIBLE_CHARACTERS.contains(&c),
466 repeat: if repeats > 0 {
467 swallow_next_invalid = repeats;
468 Some(errors::UnknownTokenRepeat { repeats })
469 } else {
470 None
471 },
472 });
473
474 if let Some(token) = token {
475 token
476 } else {
477 preceded_by_whitespace = true;
478 continue;
479 }
480 }
481 rustc_lexer::TokenKind::Eof => token::Eof,
482 };
483 let span = self.mk_sp(start, self.pos);
484 return (Token::new(kind, span), preceded_by_whitespace);
485 }
486 }
487
488 fn ident(&self, start: BytePos) -> TokenKind {
489 let sym = nfc_normalize(self.str_from(start));
490 let span = self.mk_sp(start, self.pos);
491 self.psess.symbol_gallery.insert(sym, span);
492 token::Ident(sym, IdentIsRaw::No)
493 }
494
495 fn lint_unicode_text_flow(&self, start: BytePos) {
498 let content_start = start + BytePos(2);
500 let content = self.str_from(content_start);
501 if contains_text_flow_control_chars(content) {
502 let span = self.mk_sp(start, self.pos);
503 let content = content.to_string();
504 self.psess.dyn_buffer_lint(
505 TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
506 span,
507 ast::CRATE_NODE_ID,
508 move |dcx, level| {
509 let spans: Vec<_> = content
510 .char_indices()
511 .filter_map(|(i, c)| {
512 TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
513 let lo = span.lo() + BytePos(2 + i as u32);
514 (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
515 })
516 })
517 .collect();
518 let characters = spans
519 .iter()
520 .map(|&(c, span)| errors::UnicodeCharNoteSub {
521 span,
522 c_debug: ::alloc::__export::must_use({
::alloc::fmt::format(format_args!("{0:?}", c))
})format!("{c:?}"),
523 })
524 .collect();
525 let suggestions =
526 (!spans.is_empty()).then_some(errors::UnicodeTextFlowSuggestion {
527 spans: spans.iter().map(|(_c, span)| *span).collect(),
528 });
529
530 errors::UnicodeTextFlow {
531 comment_span: span,
532 characters,
533 suggestions,
534 num_codepoints: spans.len(),
535 }
536 .into_diag(dcx, level)
537 },
538 );
539 }
540 }
541
542 fn lint_doc_comment_unicode_text_flow(&mut self, start: BytePos, content: &str) {
543 if contains_text_flow_control_chars(content) {
544 self.report_text_direction_codepoint(
545 content,
546 self.mk_sp(start, self.pos),
547 0,
548 false,
549 true,
550 "doc comment",
551 );
552 }
553 }
554
555 fn lint_literal_unicode_text_flow(
556 &mut self,
557 text: Symbol,
558 lit_kind: token::LitKind,
559 span: Span,
560 label: &'static str,
561 ) {
562 if !contains_text_flow_control_chars(text.as_str()) {
563 return;
564 }
565 let (padding, point_at_inner_spans) = match lit_kind {
566 token::LitKind::Str | token::LitKind::Char => (1, true),
568 token::LitKind::CStr => (2, true),
570 token::LitKind::StrRaw(n) => (n as u32 + 2, true),
572 token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
574 token::LitKind::Err(_) => return,
576 _ => (0, false),
578 };
579 self.report_text_direction_codepoint(
580 text.as_str(),
581 span,
582 padding,
583 point_at_inner_spans,
584 false,
585 label,
586 );
587 }
588
589 fn report_text_direction_codepoint(
590 &self,
591 text: &str,
592 span: Span,
593 padding: u32,
594 point_at_inner_spans: bool,
595 is_doc_comment: bool,
596 label: &str,
597 ) {
598 let spans: Vec<_> = text
600 .char_indices()
601 .filter_map(|(i, c)| {
602 TEXT_FLOW_CONTROL_CHARS.contains(&c).then(|| {
603 let lo = span.lo() + BytePos(i as u32 + padding);
604 (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
605 })
606 })
607 .collect();
608
609 let label = label.to_string();
610 let count = spans.len();
611 let labels = point_at_inner_spans
612 .then_some(errors::HiddenUnicodeCodepointsDiagLabels { spans: spans.clone() });
613 let sub = if point_at_inner_spans && !spans.is_empty() {
614 errors::HiddenUnicodeCodepointsDiagSub::Escape { spans }
615 } else {
616 errors::HiddenUnicodeCodepointsDiagSub::NoEscape { spans, is_doc_comment }
617 };
618
619 self.psess.buffer_lint(
620 TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
621 span,
622 ast::CRATE_NODE_ID,
623 errors::HiddenUnicodeCodepointsDiag { label, count, span_label: span, labels, sub },
624 );
625 }
626
627 fn validate_frontmatter(
628 &self,
629 start: BytePos,
630 has_invalid_preceding_whitespace: bool,
631 invalid_infostring: bool,
632 ) {
633 let s = self.str_from(start);
634 let real_start = s.find("---").unwrap();
635 let frontmatter_opening_pos = BytePos(real_start as u32) + start;
636 let real_s = &s[real_start..];
637 let within = real_s.trim_start_matches('-');
638 let len_opening = real_s.len() - within.len();
639
640 let frontmatter_opening_end_pos = frontmatter_opening_pos + BytePos(len_opening as u32);
641 if has_invalid_preceding_whitespace {
642 let line_start =
643 BytePos(s[..real_start].rfind("\n").map_or(0, |i| i as u32 + 1)) + start;
644 let span = self.mk_sp(line_start, frontmatter_opening_end_pos);
645 let label_span = self.mk_sp(line_start, frontmatter_opening_pos);
646 self.dcx().emit_err(errors::FrontmatterInvalidOpeningPrecedingWhitespace {
647 span,
648 note_span: label_span,
649 });
650 }
651
652 let line_end = real_s.find('\n').unwrap_or(real_s.len());
653 if invalid_infostring {
654 let span = self.mk_sp(
655 frontmatter_opening_end_pos,
656 frontmatter_opening_pos + BytePos(line_end as u32),
657 );
658 self.dcx().emit_err(errors::FrontmatterInvalidInfostring { span });
659 }
660
661 let last_line_start = real_s.rfind('\n').map_or(line_end, |i| i + 1);
662
663 let content = &real_s[line_end..last_line_start];
664 if let Some(cr_offset) = content.find('\r') {
665 let cr_pos = start + BytePos((real_start + line_end + cr_offset) as u32);
666 let span = self.mk_sp(cr_pos, cr_pos + BytePos(1 as u32));
667 self.dcx().emit_err(errors::BareCrFrontmatter { span });
668 }
669
670 let last_line = &real_s[last_line_start..];
671 let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace);
672 let last_line_start_pos = frontmatter_opening_pos + BytePos(last_line_start as u32);
673
674 let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
675 self.psess.gated_spans.gate(sym::frontmatter, frontmatter_span);
676
677 if !last_line_trimmed.starts_with("---") {
678 let label_span = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
679 self.dcx().emit_err(errors::FrontmatterUnclosed {
680 span: frontmatter_span,
681 note_span: label_span,
682 });
683 return;
684 }
685
686 if last_line_trimmed.len() != last_line.len() {
687 let line_end = last_line_start_pos + BytePos(last_line.len() as u32);
688 let span = self.mk_sp(last_line_start_pos, line_end);
689 let whitespace_end =
690 last_line_start_pos + BytePos((last_line.len() - last_line_trimmed.len()) as u32);
691 let label_span = self.mk_sp(last_line_start_pos, whitespace_end);
692 self.dcx().emit_err(errors::FrontmatterInvalidClosingPrecedingWhitespace {
693 span,
694 note_span: label_span,
695 });
696 }
697
698 let rest = last_line_trimmed.trim_start_matches('-');
699 let len_close = last_line_trimmed.len() - rest.len();
700 if len_close != len_opening {
701 let span = self.mk_sp(frontmatter_opening_pos, self.pos);
702 let opening = self.mk_sp(frontmatter_opening_pos, frontmatter_opening_end_pos);
703 let last_line_close_pos = last_line_start_pos + BytePos(len_close as u32);
704 let close = self.mk_sp(last_line_start_pos, last_line_close_pos);
705 self.dcx().emit_err(errors::FrontmatterLengthMismatch {
706 span,
707 opening,
708 close,
709 len_opening,
710 len_close,
711 });
712 }
713
714 if u8::try_from(len_opening).is_err() {
716 self.dcx().emit_err(errors::FrontmatterTooManyDashes { len_opening });
717 }
718
719 if !rest.trim_matches(is_horizontal_whitespace).is_empty() {
720 let span = self.mk_sp(last_line_start_pos, self.pos);
721 self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
722 }
723 }
724
725 fn cook_doc_comment(
726 &self,
727 content_start: BytePos,
728 content: &str,
729 comment_kind: CommentKind,
730 doc_style: DocStyle,
731 ) -> TokenKind {
732 if content.contains('\r') {
733 for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') {
734 let span = self.mk_sp(
735 content_start + BytePos(idx as u32),
736 content_start + BytePos(idx as u32 + 1),
737 );
738 let block = #[allow(non_exhaustive_omitted_patterns)] match comment_kind {
CommentKind::Block => true,
_ => false,
}matches!(comment_kind, CommentKind::Block);
739 self.dcx().emit_err(errors::CrDocComment { span, block });
740 }
741 }
742
743 let attr_style = match doc_style {
744 DocStyle::Outer => AttrStyle::Outer,
745 DocStyle::Inner => AttrStyle::Inner,
746 };
747
748 token::DocComment(comment_kind, attr_style, Symbol::intern(content))
749 }
750
751 fn cook_lexer_literal(
752 &self,
753 start: BytePos,
754 end: BytePos,
755 kind: rustc_lexer::LiteralKind,
756 ) -> (token::LitKind, Symbol) {
757 match kind {
758 rustc_lexer::LiteralKind::Char { terminated } => {
759 if !terminated {
760 let mut err = self
761 .dcx()
762 .struct_span_fatal(self.mk_sp(start, end), "unterminated character literal")
763 .with_code(E0762);
764 if let Some(lt_sp) = self.last_lifetime {
765 err.multipart_suggestion(
766 "if you meant to write a string literal, use double quotes",
767 ::alloc::boxed::box_assume_init_into_vec_unsafe(::alloc::intrinsics::write_box_via_move(::alloc::boxed::Box::new_uninit(),
[(lt_sp, "\"".to_string()),
(self.mk_sp(start, start + BytePos(1)), "\"".to_string())]))vec![
768 (lt_sp, "\"".to_string()),
769 (self.mk_sp(start, start + BytePos(1)), "\"".to_string()),
770 ],
771 Applicability::MaybeIncorrect,
772 );
773 }
774 err.emit()
775 }
776 self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) }
778 rustc_lexer::LiteralKind::Byte { terminated } => {
779 if !terminated {
780 self.dcx()
781 .struct_span_fatal(
782 self.mk_sp(start + BytePos(1), end),
783 "unterminated byte constant",
784 )
785 .with_code(E0763)
786 .emit()
787 }
788 self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) }
790 rustc_lexer::LiteralKind::Str { terminated } => {
791 if !terminated {
792 self.dcx()
793 .struct_span_fatal(
794 self.mk_sp(start, end),
795 "unterminated double quote string",
796 )
797 .with_code(E0765)
798 .emit()
799 }
800 self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) }
802 rustc_lexer::LiteralKind::ByteStr { terminated } => {
803 if !terminated {
804 self.dcx()
805 .struct_span_fatal(
806 self.mk_sp(start + BytePos(1), end),
807 "unterminated double quote byte string",
808 )
809 .with_code(E0766)
810 .emit()
811 }
812 self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1)
813 }
815 rustc_lexer::LiteralKind::CStr { terminated } => {
816 if !terminated {
817 self.dcx()
818 .struct_span_fatal(
819 self.mk_sp(start + BytePos(1), end),
820 "unterminated C string",
821 )
822 .with_code(E0767)
823 .emit()
824 }
825 self.cook_quoted(token::CStr, Mode::CStr, start, end, 2, 1) }
827 rustc_lexer::LiteralKind::RawStr { n_hashes } => {
828 if let Some(n_hashes) = n_hashes {
829 let n = u32::from(n_hashes);
830 let kind = token::StrRaw(n_hashes);
831 self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n)
832 } else {
834 self.report_raw_str_error(start, 1);
835 }
836 }
837 rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
838 if let Some(n_hashes) = n_hashes {
839 let n = u32::from(n_hashes);
840 let kind = token::ByteStrRaw(n_hashes);
841 self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n)
842 } else {
844 self.report_raw_str_error(start, 2);
845 }
846 }
847 rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
848 if let Some(n_hashes) = n_hashes {
849 let n = u32::from(n_hashes);
850 let kind = token::CStrRaw(n_hashes);
851 self.cook_quoted(kind, Mode::RawCStr, start, end, 3 + n, 1 + n)
852 } else {
854 self.report_raw_str_error(start, 2);
855 }
856 }
857 rustc_lexer::LiteralKind::Int { base, empty_int } => {
858 let mut kind = token::Integer;
859 if empty_int {
860 let span = self.mk_sp(start, end);
861 let guar = self.dcx().emit_err(errors::NoDigitsLiteral { span });
862 kind = token::Err(guar);
863 } else if #[allow(non_exhaustive_omitted_patterns)] match base {
Base::Binary | Base::Octal => true,
_ => false,
}matches!(base, Base::Binary | Base::Octal) {
864 let base = base as u32;
865 let s = self.str_from_to(start + BytePos(2), end);
866 for (idx, c) in s.char_indices() {
867 let span = self.mk_sp(
868 start + BytePos::from_usize(2 + idx),
869 start + BytePos::from_usize(2 + idx + c.len_utf8()),
870 );
871 if c != '_' && c.to_digit(base).is_none() {
872 let guar =
873 self.dcx().emit_err(errors::InvalidDigitLiteral { span, base });
874 kind = token::Err(guar);
875 }
876 }
877 }
878 (kind, self.symbol_from_to(start, end))
879 }
880 rustc_lexer::LiteralKind::Float { base, empty_exponent } => {
881 let mut kind = token::Float;
882 if empty_exponent {
883 let span = self.mk_sp(start, self.pos);
884 let guar = self.dcx().emit_err(errors::EmptyExponentFloat { span });
885 kind = token::Err(guar);
886 }
887 let base = match base {
888 Base::Hexadecimal => Some("hexadecimal"),
889 Base::Octal => Some("octal"),
890 Base::Binary => Some("binary"),
891 _ => None,
892 };
893 if let Some(base) = base {
894 let span = self.mk_sp(start, end);
895 let guar =
896 self.dcx().emit_err(errors::FloatLiteralUnsupportedBase { span, base });
897 kind = token::Err(guar)
898 }
899 (kind, self.symbol_from_to(start, end))
900 }
901 }
902 }
903
904 #[inline]
905 fn src_index(&self, pos: BytePos) -> usize {
906 (pos - self.start_pos).to_usize()
907 }
908
909 fn str_from(&self, start: BytePos) -> &'src str {
912 self.str_from_to(start, self.pos)
913 }
914
915 fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
917 {
use ::tracing::__macro_support::Callsite as _;
static __CALLSITE: ::tracing::callsite::DefaultCallsite =
{
static META: ::tracing::Metadata<'static> =
{
::tracing_core::metadata::Metadata::new("event compiler/rustc_parse/src/lexer/mod.rs:917",
"rustc_parse::lexer", ::tracing::Level::DEBUG,
::tracing_core::__macro_support::Option::Some("compiler/rustc_parse/src/lexer/mod.rs"),
::tracing_core::__macro_support::Option::Some(917u32),
::tracing_core::__macro_support::Option::Some("rustc_parse::lexer"),
::tracing_core::field::FieldSet::new(&["message"],
::tracing_core::callsite::Identifier(&__CALLSITE)),
::tracing::metadata::Kind::EVENT)
};
::tracing::callsite::DefaultCallsite::new(&META)
};
let enabled =
::tracing::Level::DEBUG <= ::tracing::level_filters::STATIC_MAX_LEVEL
&&
::tracing::Level::DEBUG <=
::tracing::level_filters::LevelFilter::current() &&
{
let interest = __CALLSITE.interest();
!interest.is_never() &&
::tracing::__macro_support::__is_enabled(__CALLSITE.metadata(),
interest)
};
if enabled {
(|value_set: ::tracing::field::ValueSet|
{
let meta = __CALLSITE.metadata();
::tracing::Event::dispatch(meta, &value_set);
;
})({
#[allow(unused_imports)]
use ::tracing::field::{debug, display, Value};
let mut iter = __CALLSITE.metadata().fields().iter();
__CALLSITE.metadata().fields().value_set(&[(&::tracing::__macro_support::Iterator::next(&mut iter).expect("FieldSet corrupted (this is a bug)"),
::tracing::__macro_support::Option::Some(&format_args!("taking an ident from {0:?} to {1:?}",
start, end) as &dyn Value))])
});
} else { ; }
};debug!("taking an ident from {:?} to {:?}", start, end);
918 Symbol::intern(self.str_from_to(start, end))
919 }
920
921 fn str_from_to(&self, start: BytePos, end: BytePos) -> &'src str {
923 &self.src[self.src_index(start)..self.src_index(end)]
924 }
925
926 fn str_from_to_end(&self, start: BytePos) -> &'src str {
928 &self.src[self.src_index(start)..]
929 }
930
931 fn report_raw_str_error(&self, start: BytePos, prefix_len: u32) -> ! {
932 match rustc_lexer::validate_raw_str(self.str_from(start), prefix_len) {
933 Err(RawStrError::InvalidStarter { bad_char }) => {
934 self.report_non_started_raw_string(start, bad_char)
935 }
936 Err(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self
937 .report_unterminated_raw_string(start, expected, possible_terminator_offset, found),
938 Err(RawStrError::TooManyDelimiters { found }) => {
939 self.report_too_many_hashes(start, found)
940 }
941 Ok(()) => {
::core::panicking::panic_fmt(format_args!("no error found for supposedly invalid raw string literal"));
}panic!("no error found for supposedly invalid raw string literal"),
942 }
943 }
944
945 fn report_non_started_raw_string(&self, start: BytePos, bad_char: char) -> ! {
946 self.dcx()
947 .struct_span_fatal(
948 self.mk_sp(start, self.pos),
949 ::alloc::__export::must_use({
::alloc::fmt::format(format_args!("found invalid character; only `#` is allowed in raw string delimitation: {0}",
escaped_char(bad_char)))
})format!(
950 "found invalid character; only `#` is allowed in raw string delimitation: {}",
951 escaped_char(bad_char)
952 ),
953 )
954 .emit()
955 }
956
957 fn report_unterminated_raw_string(
958 &self,
959 start: BytePos,
960 n_hashes: u32,
961 possible_offset: Option<u32>,
962 found_terminators: u32,
963 ) -> ! {
964 let mut err =
965 self.dcx().struct_span_fatal(self.mk_sp(start, start), "unterminated raw string");
966 err.code(E0748);
967 err.span_label(self.mk_sp(start, start), "unterminated raw string");
968
969 if n_hashes > 0 {
970 err.note(::alloc::__export::must_use({
::alloc::fmt::format(format_args!("this raw string should be terminated with `\"{0}`",
"#".repeat(n_hashes as usize)))
})format!(
971 "this raw string should be terminated with `\"{}`",
972 "#".repeat(n_hashes as usize)
973 ));
974 }
975
976 if let Some(possible_offset) = possible_offset {
977 let lo = start + BytePos(possible_offset);
978 let hi = lo + BytePos(found_terminators);
979 let span = self.mk_sp(lo, hi);
980 err.span_suggestion(
981 span,
982 "consider terminating the string here",
983 "#".repeat(n_hashes as usize),
984 Applicability::MaybeIncorrect,
985 );
986 }
987
988 err.emit()
989 }
990
991 fn report_unterminated_block_comment(&self, start: BytePos, doc_style: Option<DocStyle>) {
992 let msg = match doc_style {
993 Some(_) => "unterminated block doc-comment",
994 None => "unterminated block comment",
995 };
996 let last_bpos = self.pos;
997 let mut err = self.dcx().struct_span_fatal(self.mk_sp(start, last_bpos), msg);
998 err.code(E0758);
999 let mut nested_block_comment_open_idxs = ::alloc::vec::Vec::new()vec![];
1000 let mut last_nested_block_comment_idxs = None;
1001 let mut content_chars = self.str_from(start).char_indices().peekable();
1002
1003 while let Some((idx, current_char)) = content_chars.next() {
1004 match content_chars.peek() {
1005 Some((_, '*')) if current_char == '/' => {
1006 nested_block_comment_open_idxs.push(idx);
1007 }
1008 Some((_, '/')) if current_char == '*' => {
1009 last_nested_block_comment_idxs =
1010 nested_block_comment_open_idxs.pop().map(|open_idx| (open_idx, idx));
1011 }
1012 _ => {}
1013 };
1014 }
1015
1016 if let Some((nested_open_idx, nested_close_idx)) = last_nested_block_comment_idxs {
1017 err.span_label(self.mk_sp(start, start + BytePos(2)), msg)
1018 .span_label(
1019 self.mk_sp(
1020 start + BytePos(nested_open_idx as u32),
1021 start + BytePos(nested_open_idx as u32 + 2),
1022 ),
1023 "...as last nested comment starts here, maybe you want to close this instead?",
1024 )
1025 .span_label(
1026 self.mk_sp(
1027 start + BytePos(nested_close_idx as u32),
1028 start + BytePos(nested_close_idx as u32 + 2),
1029 ),
1030 "...and last nested comment terminates here.",
1031 );
1032 }
1033
1034 err.emit();
1035 }
1036
1037 fn report_unknown_prefix(&self, start: BytePos) {
1042 let prefix_span = self.mk_sp(start, self.pos);
1043 let prefix = self.str_from_to(start, self.pos);
1044 let expn_data = prefix_span.ctxt().outer_expn_data();
1045
1046 if expn_data.edition.at_least_rust_2021() {
1047 let sugg = if prefix == "rb" {
1049 Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
1050 } else if prefix == "rc" {
1051 Some(errors::UnknownPrefixSugg::UseCr(prefix_span))
1052 } else if expn_data.is_root() {
1053 if self.cursor.first() == '\''
1054 && let Some(start) = self.last_lifetime
1055 && self.cursor.third() != '\''
1056 && let end = self.mk_sp(self.pos, self.pos + BytePos(1))
1057 && !self.psess.source_map().is_multiline(start.until(end))
1058 {
1059 Some(errors::UnknownPrefixSugg::MeantStr { start, end })
1063 } else {
1064 Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
1065 }
1066 } else {
1067 None
1068 };
1069 self.dcx().emit_err(errors::UnknownPrefix { span: prefix_span, prefix, sugg });
1070 } else {
1071 self.psess.buffer_lint(
1073 RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
1074 prefix_span,
1075 ast::CRATE_NODE_ID,
1076 errors::ReservedPrefix {
1077 label: prefix_span,
1078 suggestion: prefix_span.shrink_to_hi(),
1079 prefix: prefix.to_string(),
1080 },
1081 );
1082 }
1083 }
1084
1085 fn maybe_report_guarded_str(&mut self, start: BytePos, str_before: &'src str) -> TokenKind {
1092 let span = self.mk_sp(start, self.pos);
1093 let edition2024 = span.edition().at_least_rust_2024();
1094
1095 let space_pos = start + BytePos(1);
1096 let space_span = self.mk_sp(space_pos, space_pos);
1097
1098 let mut cursor = Cursor::new(str_before, FrontmatterAllowed::No);
1099
1100 let (is_string, span, unterminated) = match cursor.guarded_double_quoted_string() {
1101 Some(rustc_lexer::GuardedStr { n_hashes, terminated, token_len }) => {
1102 let end = start + BytePos(token_len);
1103 let span = self.mk_sp(start, end);
1104 let str_start = start + BytePos(n_hashes);
1105
1106 if edition2024 {
1107 self.cursor = cursor;
1108 self.pos = end;
1109 }
1110
1111 let unterminated = if terminated { None } else { Some(str_start) };
1112
1113 (true, span, unterminated)
1114 }
1115 None => {
1116 if true {
match (&self.str_from_to(start, start + BytePos(2)), &"##") {
(left_val, right_val) => {
if !(*left_val == *right_val) {
let kind = ::core::panicking::AssertKind::Eq;
::core::panicking::assert_failed(kind, &*left_val,
&*right_val, ::core::option::Option::None);
}
}
};
};debug_assert_eq!(self.str_from_to(start, start + BytePos(2)), "##");
1118
1119 (false, span, None)
1120 }
1121 };
1122 if edition2024 {
1123 if let Some(str_start) = unterminated {
1124 self.dcx()
1126 .struct_span_fatal(
1127 self.mk_sp(str_start, self.pos),
1128 "unterminated double quote string",
1129 )
1130 .with_code(E0765)
1131 .emit()
1132 }
1133
1134 let sugg = if span.from_expansion() {
1135 None
1136 } else {
1137 Some(errors::GuardedStringSugg(space_span))
1138 };
1139
1140 let err = if is_string {
1142 self.dcx().emit_err(errors::ReservedString { span, sugg })
1143 } else {
1144 self.dcx().emit_err(errors::ReservedMultihash { span, sugg })
1145 };
1146
1147 token::Literal(token::Lit {
1148 kind: token::Err(err),
1149 symbol: self.symbol_from_to(start, self.pos),
1150 suffix: None,
1151 })
1152 } else {
1153 self.psess.dyn_buffer_lint(
1155 RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
1156 span,
1157 ast::CRATE_NODE_ID,
1158 move |dcx, level| {
1159 if is_string {
1160 errors::ReservedStringLint { suggestion: space_span }.into_diag(dcx, level)
1161 } else {
1162 errors::ReservedMultihashLint { suggestion: space_span }
1163 .into_diag(dcx, level)
1164 }
1165 },
1166 );
1167
1168 self.pos = start + BytePos(1);
1171 self.cursor = Cursor::new(&str_before[1..], FrontmatterAllowed::No);
1172 token::Pound
1173 }
1174 }
1175
1176 fn report_too_many_hashes(&self, start: BytePos, num: u32) -> ! {
1177 self.dcx().emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
1178 }
1179
1180 fn cook_quoted(
1181 &self,
1182 mut kind: token::LitKind,
1183 mode: Mode,
1184 start: BytePos,
1185 end: BytePos,
1186 prefix_len: u32,
1187 postfix_len: u32,
1188 ) -> (token::LitKind, Symbol) {
1189 let content_start = start + BytePos(prefix_len);
1190 let content_end = end - BytePos(postfix_len);
1191 let lit_content = self.str_from_to(content_start, content_end);
1192 check_for_errors(lit_content, mode, |range, err| {
1193 let span_with_quotes = self.mk_sp(start, end);
1194 let (start, end) = (range.start as u32, range.end as u32);
1195 let lo = content_start + BytePos(start);
1196 let hi = lo + BytePos(end - start);
1197 let span = self.mk_sp(lo, hi);
1198 let is_fatal = err.is_fatal();
1199 if let Some(guar) = emit_unescape_error(
1200 self.dcx(),
1201 lit_content,
1202 span_with_quotes,
1203 span,
1204 mode,
1205 range,
1206 err,
1207 ) {
1208 if !is_fatal { ::core::panicking::panic("assertion failed: is_fatal") };assert!(is_fatal);
1209 kind = token::Err(guar);
1210 }
1211 });
1212
1213 let sym = if !#[allow(non_exhaustive_omitted_patterns)] match kind {
token::Err(_) => true,
_ => false,
}matches!(kind, token::Err(_)) {
1216 Symbol::intern(lit_content)
1217 } else {
1218 self.symbol_from_to(start, end)
1219 };
1220 (kind, sym)
1221 }
1222}
1223
1224pub fn nfc_normalize(string: &str) -> Symbol {
1225 use unicode_normalization::{IsNormalized, UnicodeNormalization, is_nfc_quick};
1226 match is_nfc_quick(string.chars()) {
1227 IsNormalized::Yes => Symbol::intern(string),
1228 _ => {
1229 let normalized_str: String = string.chars().nfc().collect();
1230 Symbol::intern(&normalized_str)
1231 }
1232 }
1233}