rustc_parse_format/
lib.rs

1//! Macro support for format strings
2//!
3//! These structures are used when parsing format strings for the compiler.
4//! Parsing does not happen at runtime: structures of `std::fmt::rt` are
5//! generated instead.
6
7// tidy-alphabetical-start
8// We want to be able to build this crate with a stable compiler,
9// so no `#![feature]` attributes should be added.
10#![deny(unstable_features)]
11#![doc(
12    html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/",
13    html_playground_url = "https://play.rust-lang.org/",
14    test(attr(deny(warnings)))
15)]
16// tidy-alphabetical-end
17
18pub use Alignment::*;
19pub use Count::*;
20pub use Position::*;
21use rustc_lexer::unescape;
22
23// Note: copied from rustc_span
24/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
25#[derive(Copy, Clone, PartialEq, Eq, Debug)]
26pub struct InnerSpan {
27    pub start: usize,
28    pub end: usize,
29}
30
31impl InnerSpan {
32    pub fn new(start: usize, end: usize) -> InnerSpan {
33        InnerSpan { start, end }
34    }
35}
36
37/// The location and before/after width of a character whose width has changed from its source code
38/// representation
39#[derive(Copy, Clone, PartialEq, Eq)]
40pub struct InnerWidthMapping {
41    /// Index of the character in the source
42    pub position: usize,
43    /// The inner width in characters
44    pub before: usize,
45    /// The transformed width in characters
46    pub after: usize,
47}
48
49impl InnerWidthMapping {
50    pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
51        InnerWidthMapping { position, before, after }
52    }
53}
54
55/// Whether the input string is a literal. If yes, it contains the inner width mappings.
56#[derive(Clone, PartialEq, Eq)]
57enum InputStringKind {
58    NotALiteral,
59    Literal { width_mappings: Vec<InnerWidthMapping> },
60}
61
62/// The type of format string that we are parsing.
63#[derive(Copy, Clone, Debug, Eq, PartialEq)]
64pub enum ParseMode {
65    /// A normal format string as per `format_args!`.
66    Format,
67    /// An inline assembly template string for `asm!`.
68    InlineAsm,
69}
70
71#[derive(Copy, Clone)]
72struct InnerOffset(usize);
73
74impl InnerOffset {
75    fn to(self, end: InnerOffset) -> InnerSpan {
76        InnerSpan::new(self.0, end.0)
77    }
78}
79
80/// A piece is a portion of the format string which represents the next part
81/// to emit. These are emitted as a stream by the `Parser` class.
82#[derive(Clone, Debug, PartialEq)]
83pub enum Piece<'a> {
84    /// A literal string which should directly be emitted
85    Lit(&'a str),
86    /// This describes that formatting should process the next argument (as
87    /// specified inside) for emission.
88    NextArgument(Box<Argument<'a>>),
89}
90
91/// Representation of an argument specification.
92#[derive(Copy, Clone, Debug, PartialEq)]
93pub struct Argument<'a> {
94    /// Where to find this argument
95    pub position: Position<'a>,
96    /// The span of the position indicator. Includes any whitespace in implicit
97    /// positions (`{  }`).
98    pub position_span: InnerSpan,
99    /// How to format the argument
100    pub format: FormatSpec<'a>,
101}
102
103/// Specification for the formatting of an argument in the format string.
104#[derive(Copy, Clone, Debug, PartialEq)]
105pub struct FormatSpec<'a> {
106    /// Optionally specified character to fill alignment with.
107    pub fill: Option<char>,
108    /// Span of the optionally specified fill character.
109    pub fill_span: Option<InnerSpan>,
110    /// Optionally specified alignment.
111    pub align: Alignment,
112    /// The `+` or `-` flag.
113    pub sign: Option<Sign>,
114    /// The `#` flag.
115    pub alternate: bool,
116    /// The `0` flag.
117    pub zero_pad: bool,
118    /// The `x` or `X` flag. (Only for `Debug`.)
119    pub debug_hex: Option<DebugHex>,
120    /// The integer precision to use.
121    pub precision: Count<'a>,
122    /// The span of the precision formatting flag (for diagnostics).
123    pub precision_span: Option<InnerSpan>,
124    /// The string width requested for the resulting format.
125    pub width: Count<'a>,
126    /// The span of the width formatting flag (for diagnostics).
127    pub width_span: Option<InnerSpan>,
128    /// The descriptor string representing the name of the format desired for
129    /// this argument, this can be empty or any number of characters, although
130    /// it is required to be one word.
131    pub ty: &'a str,
132    /// The span of the descriptor string (for diagnostics).
133    pub ty_span: Option<InnerSpan>,
134}
135
136/// Enum describing where an argument for a format can be located.
137#[derive(Copy, Clone, Debug, PartialEq)]
138pub enum Position<'a> {
139    /// The argument is implied to be located at an index
140    ArgumentImplicitlyIs(usize),
141    /// The argument is located at a specific index given in the format,
142    ArgumentIs(usize),
143    /// The argument has a name.
144    ArgumentNamed(&'a str),
145}
146
147impl Position<'_> {
148    pub fn index(&self) -> Option<usize> {
149        match self {
150            ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i),
151            _ => None,
152        }
153    }
154}
155
156/// Enum of alignments which are supported.
157#[derive(Copy, Clone, Debug, PartialEq)]
158pub enum Alignment {
159    /// The value will be aligned to the left.
160    AlignLeft,
161    /// The value will be aligned to the right.
162    AlignRight,
163    /// The value will be aligned in the center.
164    AlignCenter,
165    /// The value will take on a default alignment.
166    AlignUnknown,
167}
168
169/// Enum for the sign flags.
170#[derive(Copy, Clone, Debug, PartialEq)]
171pub enum Sign {
172    /// The `+` flag.
173    Plus,
174    /// The `-` flag.
175    Minus,
176}
177
178/// Enum for the debug hex flags.
179#[derive(Copy, Clone, Debug, PartialEq)]
180pub enum DebugHex {
181    /// The `x` flag in `{:x?}`.
182    Lower,
183    /// The `X` flag in `{:X?}`.
184    Upper,
185}
186
187/// A count is used for the precision and width parameters of an integer, and
188/// can reference either an argument or a literal integer.
189#[derive(Copy, Clone, Debug, PartialEq)]
190pub enum Count<'a> {
191    /// The count is specified explicitly.
192    CountIs(u16),
193    /// The count is specified by the argument with the given name.
194    CountIsName(&'a str, InnerSpan),
195    /// The count is specified by the argument at the given index.
196    CountIsParam(usize),
197    /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index.
198    CountIsStar(usize),
199    /// The count is implied and cannot be explicitly specified.
200    CountImplied,
201}
202
203pub struct ParseError {
204    pub description: String,
205    pub note: Option<String>,
206    pub label: String,
207    pub span: InnerSpan,
208    pub secondary_label: Option<(String, InnerSpan)>,
209    pub suggestion: Suggestion,
210}
211
212pub enum Suggestion {
213    None,
214    /// Replace inline argument with positional argument:
215    /// `format!("{foo.bar}")` -> `format!("{}", foo.bar)`
216    UsePositional,
217    /// Remove `r#` from identifier:
218    /// `format!("{r#foo}")` -> `format!("{foo}")`
219    RemoveRawIdent(InnerSpan),
220    /// Reorder format parameter:
221    /// `format!("{foo:?#}")` -> `format!("{foo:#?}")`
222    /// `format!("{foo:?x}")` -> `format!("{foo:x?}")`
223    /// `format!("{foo:?X}")` -> `format!("{foo:X?}")`
224    ReorderFormatParameter(InnerSpan, String),
225}
226
227/// The parser structure for interpreting the input format string. This is
228/// modeled as an iterator over `Piece` structures to form a stream of tokens
229/// being output.
230///
231/// This is a recursive-descent parser for the sake of simplicity, and if
232/// necessary there's probably lots of room for improvement performance-wise.
233pub struct Parser<'a> {
234    mode: ParseMode,
235    input: &'a str,
236    cur: std::iter::Peekable<std::str::CharIndices<'a>>,
237    /// Error messages accumulated during parsing
238    pub errors: Vec<ParseError>,
239    /// Current position of implicit positional argument pointer
240    pub curarg: usize,
241    /// `Some(raw count)` when the string is "raw", used to position spans correctly
242    style: Option<usize>,
243    /// Start and end byte offset of every successfully parsed argument
244    pub arg_places: Vec<InnerSpan>,
245    /// Characters whose length has been changed from their in-code representation
246    width_map: Vec<InnerWidthMapping>,
247    /// Span of the last opening brace seen, used for error reporting
248    last_opening_brace: Option<InnerSpan>,
249    /// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
250    append_newline: bool,
251    /// Whether this formatting string was written directly in the source. This controls whether we
252    /// can use spans to refer into it and give better error messages.
253    /// N.B: This does _not_ control whether implicit argument captures can be used.
254    pub is_source_literal: bool,
255    /// Start position of the current line.
256    cur_line_start: usize,
257    /// Start and end byte offset of every line of the format string. Excludes
258    /// newline characters and leading whitespace.
259    pub line_spans: Vec<InnerSpan>,
260}
261
262impl<'a> Iterator for Parser<'a> {
263    type Item = Piece<'a>;
264
265    fn next(&mut self) -> Option<Piece<'a>> {
266        if let Some(&(pos, c)) = self.cur.peek() {
267            match c {
268                '{' => {
269                    let curr_last_brace = self.last_opening_brace;
270                    let byte_pos = self.to_span_index(pos);
271                    let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
272                    self.last_opening_brace = Some(byte_pos.to(lbrace_end));
273                    self.cur.next();
274                    if self.consume('{') {
275                        self.last_opening_brace = curr_last_brace;
276
277                        Some(Piece::Lit(self.string(pos + 1)))
278                    } else {
279                        let arg = self.argument(lbrace_end);
280                        if let Some(rbrace_pos) = self.consume_closing_brace(&arg) {
281                            if self.is_source_literal {
282                                let lbrace_byte_pos = self.to_span_index(pos);
283                                let rbrace_byte_pos = self.to_span_index(rbrace_pos);
284
285                                let width = self.to_span_width(rbrace_pos);
286
287                                self.arg_places.push(
288                                    lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
289                                );
290                            }
291                        } else if let Some(&(_, maybe)) = self.cur.peek() {
292                            match maybe {
293                                '?' => self.suggest_format_debug(),
294                                '<' | '^' | '>' => self.suggest_format_align(maybe),
295                                _ => self.suggest_positional_arg_instead_of_captured_arg(arg),
296                            }
297                        }
298                        Some(Piece::NextArgument(Box::new(arg)))
299                    }
300                }
301                '}' => {
302                    self.cur.next();
303                    if self.consume('}') {
304                        Some(Piece::Lit(self.string(pos + 1)))
305                    } else {
306                        let err_pos = self.to_span_index(pos);
307                        self.err_with_note(
308                            "unmatched `}` found",
309                            "unmatched `}`",
310                            "if you intended to print `}`, you can escape it using `}}`",
311                            err_pos.to(err_pos),
312                        );
313                        None
314                    }
315                }
316                _ => Some(Piece::Lit(self.string(pos))),
317            }
318        } else {
319            if self.is_source_literal {
320                let span = self.span(self.cur_line_start, self.input.len());
321                if self.line_spans.last() != Some(&span) {
322                    self.line_spans.push(span);
323                }
324            }
325            None
326        }
327    }
328}
329
330impl<'a> Parser<'a> {
331    /// Creates a new parser for the given format string
332    pub fn new(
333        s: &'a str,
334        style: Option<usize>,
335        snippet: Option<String>,
336        append_newline: bool,
337        mode: ParseMode,
338    ) -> Parser<'a> {
339        let input_string_kind = find_width_map_from_snippet(s, snippet, style);
340        let (width_map, is_source_literal) = match input_string_kind {
341            InputStringKind::Literal { width_mappings } => (width_mappings, true),
342            InputStringKind::NotALiteral => (Vec::new(), false),
343        };
344
345        Parser {
346            mode,
347            input: s,
348            cur: s.char_indices().peekable(),
349            errors: vec![],
350            curarg: 0,
351            style,
352            arg_places: vec![],
353            width_map,
354            last_opening_brace: None,
355            append_newline,
356            is_source_literal,
357            cur_line_start: 0,
358            line_spans: vec![],
359        }
360    }
361
362    /// Notifies of an error. The message doesn't actually need to be of type
363    /// String, but I think it does when this eventually uses conditions so it
364    /// might as well start using it now.
365    fn err(&mut self, description: impl Into<String>, label: impl Into<String>, span: InnerSpan) {
366        self.errors.push(ParseError {
367            description: description.into(),
368            note: None,
369            label: label.into(),
370            span,
371            secondary_label: None,
372            suggestion: Suggestion::None,
373        });
374    }
375
376    /// Notifies of an error. The message doesn't actually need to be of type
377    /// String, but I think it does when this eventually uses conditions so it
378    /// might as well start using it now.
379    fn err_with_note(
380        &mut self,
381        description: impl Into<String>,
382        label: impl Into<String>,
383        note: impl Into<String>,
384        span: InnerSpan,
385    ) {
386        self.errors.push(ParseError {
387            description: description.into(),
388            note: Some(note.into()),
389            label: label.into(),
390            span,
391            secondary_label: None,
392            suggestion: Suggestion::None,
393        });
394    }
395
396    /// Optionally consumes the specified character. If the character is not at
397    /// the current position, then the current iterator isn't moved and `false` is
398    /// returned, otherwise the character is consumed and `true` is returned.
399    fn consume(&mut self, c: char) -> bool {
400        self.consume_pos(c).is_some()
401    }
402
403    /// Optionally consumes the specified character. If the character is not at
404    /// the current position, then the current iterator isn't moved and `None` is
405    /// returned, otherwise the character is consumed and the current position is
406    /// returned.
407    fn consume_pos(&mut self, c: char) -> Option<usize> {
408        if let Some(&(pos, maybe)) = self.cur.peek() {
409            if c == maybe {
410                self.cur.next();
411                return Some(pos);
412            }
413        }
414        None
415    }
416
417    fn remap_pos(&self, mut pos: usize) -> InnerOffset {
418        for width in &self.width_map {
419            if pos > width.position {
420                pos += width.before - width.after;
421            } else if pos == width.position && width.after == 0 {
422                pos += width.before;
423            } else {
424                break;
425            }
426        }
427
428        InnerOffset(pos)
429    }
430
431    fn to_span_index(&self, pos: usize) -> InnerOffset {
432        // This handles the raw string case, the raw argument is the number of #
433        // in r###"..."### (we need to add one because of the `r`).
434        let raw = self.style.map_or(0, |raw| raw + 1);
435        let pos = self.remap_pos(pos);
436        InnerOffset(raw + pos.0 + 1)
437    }
438
439    fn to_span_width(&self, pos: usize) -> usize {
440        let pos = self.remap_pos(pos);
441        match self.width_map.iter().find(|w| w.position == pos.0) {
442            Some(w) => w.before,
443            None => 1,
444        }
445    }
446
447    fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
448        let start = self.to_span_index(start_pos);
449        let end = self.to_span_index(end_pos);
450        start.to(end)
451    }
452
453    /// Forces consumption of the specified character. If the character is not
454    /// found, an error is emitted.
455    fn consume_closing_brace(&mut self, arg: &Argument<'_>) -> Option<usize> {
456        self.ws();
457
458        let pos;
459        let description;
460
461        if let Some(&(peek_pos, maybe)) = self.cur.peek() {
462            if maybe == '}' {
463                self.cur.next();
464                return Some(peek_pos);
465            }
466
467            pos = peek_pos;
468            description = format!("expected `}}`, found `{}`", maybe.escape_debug());
469        } else {
470            description = "expected `}` but string was terminated".to_owned();
471            // point at closing `"`
472            pos = self.input.len() - if self.append_newline { 1 } else { 0 };
473        }
474
475        let pos = self.to_span_index(pos);
476
477        let label = "expected `}`".to_owned();
478        let (note, secondary_label) = if arg.format.fill == Some('}') {
479            (
480                Some("the character `}` is interpreted as a fill character because of the `:` that precedes it".to_owned()),
481                arg.format.fill_span.map(|sp| ("this is not interpreted as a formatting closing brace".to_owned(), sp)),
482            )
483        } else {
484            (
485                Some("if you intended to print `{`, you can escape it using `{{`".to_owned()),
486                self.last_opening_brace.map(|sp| ("because of this opening brace".to_owned(), sp)),
487            )
488        };
489
490        self.errors.push(ParseError {
491            description,
492            note,
493            label,
494            span: pos.to(pos),
495            secondary_label,
496            suggestion: Suggestion::None,
497        });
498
499        None
500    }
501
502    /// Consumes all whitespace characters until the first non-whitespace character
503    fn ws(&mut self) {
504        while let Some(_) = self.cur.next_if(|&(_, c)| c.is_whitespace()) {}
505    }
506
507    /// Parses all of a string which is to be considered a "raw literal" in a
508    /// format string. This is everything outside of the braces.
509    fn string(&mut self, start: usize) -> &'a str {
510        // we may not consume the character, peek the iterator
511        while let Some(&(pos, c)) = self.cur.peek() {
512            match c {
513                '{' | '}' => {
514                    return &self.input[start..pos];
515                }
516                '\n' if self.is_source_literal => {
517                    self.line_spans.push(self.span(self.cur_line_start, pos));
518                    self.cur_line_start = pos + 1;
519                    self.cur.next();
520                }
521                _ => {
522                    if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() {
523                        self.cur_line_start = pos + c.len_utf8();
524                    }
525                    self.cur.next();
526                }
527            }
528        }
529        &self.input[start..]
530    }
531
532    /// Parses an `Argument` structure, or what's contained within braces inside the format string.
533    fn argument(&mut self, start: InnerOffset) -> Argument<'a> {
534        let pos = self.position();
535
536        let end = self
537            .cur
538            .clone()
539            .find(|(_, ch)| !ch.is_whitespace())
540            .map_or(start, |(end, _)| self.to_span_index(end));
541        let position_span = start.to(end);
542
543        let format = match self.mode {
544            ParseMode::Format => self.format(),
545            ParseMode::InlineAsm => self.inline_asm(),
546        };
547
548        // Resolve position after parsing format spec.
549        let pos = match pos {
550            Some(position) => position,
551            None => {
552                let i = self.curarg;
553                self.curarg += 1;
554                ArgumentImplicitlyIs(i)
555            }
556        };
557
558        Argument { position: pos, position_span, format }
559    }
560
561    /// Parses a positional argument for a format. This could either be an
562    /// integer index of an argument, a named argument, or a blank string.
563    /// Returns `Some(parsed_position)` if the position is not implicitly
564    /// consuming a macro argument, `None` if it's the case.
565    fn position(&mut self) -> Option<Position<'a>> {
566        if let Some(i) = self.integer() {
567            Some(ArgumentIs(i.into()))
568        } else {
569            match self.cur.peek() {
570                Some(&(lo, c)) if rustc_lexer::is_id_start(c) => {
571                    let word = self.word();
572
573                    // Recover from `r#ident` in format strings.
574                    // FIXME: use a let chain
575                    if word == "r" {
576                        if let Some((pos, '#')) = self.cur.peek() {
577                            if self.input[pos + 1..]
578                                .chars()
579                                .next()
580                                .is_some_and(rustc_lexer::is_id_start)
581                            {
582                                self.cur.next();
583                                let word = self.word();
584                                let prefix_span = self.span(lo, lo + 2);
585                                let full_span = self.span(lo, lo + 2 + word.len());
586                                self.errors.insert(0, ParseError {
587                                    description: "raw identifiers are not supported".to_owned(),
588                                    note: Some("identifiers in format strings can be keywords and don't need to be prefixed with `r#`".to_string()),
589                                    label: "raw identifier used here".to_owned(),
590                                    span: full_span,
591                                    secondary_label: None,
592                                    suggestion: Suggestion::RemoveRawIdent(prefix_span),
593                                });
594                                return Some(ArgumentNamed(word));
595                            }
596                        }
597                    }
598
599                    Some(ArgumentNamed(word))
600                }
601
602                // This is an `ArgumentNext`.
603                // Record the fact and do the resolution after parsing the
604                // format spec, to make things like `{:.*}` work.
605                _ => None,
606            }
607        }
608    }
609
610    fn current_pos(&mut self) -> usize {
611        if let Some(&(pos, _)) = self.cur.peek() { pos } else { self.input.len() }
612    }
613
614    /// Parses a format specifier at the current position, returning all of the
615    /// relevant information in the `FormatSpec` struct.
616    fn format(&mut self) -> FormatSpec<'a> {
617        let mut spec = FormatSpec {
618            fill: None,
619            fill_span: None,
620            align: AlignUnknown,
621            sign: None,
622            alternate: false,
623            zero_pad: false,
624            debug_hex: None,
625            precision: CountImplied,
626            precision_span: None,
627            width: CountImplied,
628            width_span: None,
629            ty: &self.input[..0],
630            ty_span: None,
631        };
632        if !self.consume(':') {
633            return spec;
634        }
635
636        // fill character
637        if let Some(&(idx, c)) = self.cur.peek() {
638            if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) {
639                spec.fill = Some(c);
640                spec.fill_span = Some(self.span(idx, idx + 1));
641                self.cur.next();
642            }
643        }
644        // Alignment
645        if self.consume('<') {
646            spec.align = AlignLeft;
647        } else if self.consume('>') {
648            spec.align = AlignRight;
649        } else if self.consume('^') {
650            spec.align = AlignCenter;
651        }
652        // Sign flags
653        if self.consume('+') {
654            spec.sign = Some(Sign::Plus);
655        } else if self.consume('-') {
656            spec.sign = Some(Sign::Minus);
657        }
658        // Alternate marker
659        if self.consume('#') {
660            spec.alternate = true;
661        }
662        // Width and precision
663        let mut havewidth = false;
664
665        if self.consume('0') {
666            // small ambiguity with '0$' as a format string. In theory this is a
667            // '0' flag and then an ill-formatted format string with just a '$'
668            // and no count, but this is better if we instead interpret this as
669            // no '0' flag and '0$' as the width instead.
670            if let Some(end) = self.consume_pos('$') {
671                spec.width = CountIsParam(0);
672                spec.width_span = Some(self.span(end - 1, end + 1));
673                havewidth = true;
674            } else {
675                spec.zero_pad = true;
676            }
677        }
678
679        if !havewidth {
680            let start = self.current_pos();
681            spec.width = self.count(start);
682            if spec.width != CountImplied {
683                let end = self.current_pos();
684                spec.width_span = Some(self.span(start, end));
685            }
686        }
687
688        if let Some(start) = self.consume_pos('.') {
689            if self.consume('*') {
690                // Resolve `CountIsNextParam`.
691                // We can do this immediately as `position` is resolved later.
692                let i = self.curarg;
693                self.curarg += 1;
694                spec.precision = CountIsStar(i);
695            } else {
696                spec.precision = self.count(start + 1);
697            }
698            let end = self.current_pos();
699            spec.precision_span = Some(self.span(start, end));
700        }
701
702        let ty_span_start = self.current_pos();
703        // Optional radix followed by the actual format specifier
704        if self.consume('x') {
705            if self.consume('?') {
706                spec.debug_hex = Some(DebugHex::Lower);
707                spec.ty = "?";
708            } else {
709                spec.ty = "x";
710            }
711        } else if self.consume('X') {
712            if self.consume('?') {
713                spec.debug_hex = Some(DebugHex::Upper);
714                spec.ty = "?";
715            } else {
716                spec.ty = "X";
717            }
718        } else if self.consume('?') {
719            spec.ty = "?";
720            if let Some(&(_, maybe)) = self.cur.peek() {
721                match maybe {
722                    '#' | 'x' | 'X' => self.suggest_format_parameter(maybe),
723                    _ => (),
724                }
725            }
726        } else {
727            spec.ty = self.word();
728            if !spec.ty.is_empty() {
729                let ty_span_end = self.current_pos();
730                spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
731            }
732        }
733        spec
734    }
735
736    /// Parses an inline assembly template modifier at the current position, returning the modifier
737    /// in the `ty` field of the `FormatSpec` struct.
738    fn inline_asm(&mut self) -> FormatSpec<'a> {
739        let mut spec = FormatSpec {
740            fill: None,
741            fill_span: None,
742            align: AlignUnknown,
743            sign: None,
744            alternate: false,
745            zero_pad: false,
746            debug_hex: None,
747            precision: CountImplied,
748            precision_span: None,
749            width: CountImplied,
750            width_span: None,
751            ty: &self.input[..0],
752            ty_span: None,
753        };
754        if !self.consume(':') {
755            return spec;
756        }
757
758        let ty_span_start = self.current_pos();
759        spec.ty = self.word();
760        if !spec.ty.is_empty() {
761            let ty_span_end = self.current_pos();
762            spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
763        }
764
765        spec
766    }
767
768    /// Parses a `Count` parameter at the current position. This does not check
769    /// for 'CountIsNextParam' because that is only used in precision, not
770    /// width.
771    fn count(&mut self, start: usize) -> Count<'a> {
772        if let Some(i) = self.integer() {
773            if self.consume('$') { CountIsParam(i.into()) } else { CountIs(i) }
774        } else {
775            let tmp = self.cur.clone();
776            let word = self.word();
777            if word.is_empty() {
778                self.cur = tmp;
779                CountImplied
780            } else if let Some(end) = self.consume_pos('$') {
781                let name_span = self.span(start, end);
782                CountIsName(word, name_span)
783            } else {
784                self.cur = tmp;
785                CountImplied
786            }
787        }
788    }
789
790    /// Parses a word starting at the current position. A word is the same as
791    /// Rust identifier, except that it can't start with `_` character.
792    fn word(&mut self) -> &'a str {
793        let start = match self.cur.peek() {
794            Some(&(pos, c)) if rustc_lexer::is_id_start(c) => {
795                self.cur.next();
796                pos
797            }
798            _ => {
799                return "";
800            }
801        };
802        let mut end = None;
803        while let Some(&(pos, c)) = self.cur.peek() {
804            if rustc_lexer::is_id_continue(c) {
805                self.cur.next();
806            } else {
807                end = Some(pos);
808                break;
809            }
810        }
811        let end = end.unwrap_or(self.input.len());
812        let word = &self.input[start..end];
813        if word == "_" {
814            self.err_with_note(
815                "invalid argument name `_`",
816                "invalid argument name",
817                "argument name cannot be a single underscore",
818                self.span(start, end),
819            );
820        }
821        word
822    }
823
824    fn integer(&mut self) -> Option<u16> {
825        let mut cur: u16 = 0;
826        let mut found = false;
827        let mut overflow = false;
828        let start = self.current_pos();
829        while let Some(&(_, c)) = self.cur.peek() {
830            if let Some(i) = c.to_digit(10) {
831                let (tmp, mul_overflow) = cur.overflowing_mul(10);
832                let (tmp, add_overflow) = tmp.overflowing_add(i as u16);
833                if mul_overflow || add_overflow {
834                    overflow = true;
835                }
836                cur = tmp;
837                found = true;
838                self.cur.next();
839            } else {
840                break;
841            }
842        }
843
844        if overflow {
845            let end = self.current_pos();
846            let overflowed_int = &self.input[start..end];
847            self.err(
848                format!(
849                    "integer `{}` does not fit into the type `u16` whose range is `0..={}`",
850                    overflowed_int,
851                    u16::MAX
852                ),
853                "integer out of range for `u16`",
854                self.span(start, end),
855            );
856        }
857
858        found.then_some(cur)
859    }
860
861    fn suggest_format_debug(&mut self) {
862        if let (Some(pos), Some(_)) = (self.consume_pos('?'), self.consume_pos(':')) {
863            let word = self.word();
864            let pos = self.to_span_index(pos);
865            self.errors.insert(
866                0,
867                ParseError {
868                    description: "expected format parameter to occur after `:`".to_owned(),
869                    note: Some(format!("`?` comes after `:`, try `{}:{}` instead", word, "?")),
870                    label: "expected `?` to occur after `:`".to_owned(),
871                    span: pos.to(pos),
872                    secondary_label: None,
873                    suggestion: Suggestion::None,
874                },
875            );
876        }
877    }
878
879    fn suggest_format_align(&mut self, alignment: char) {
880        if let Some(pos) = self.consume_pos(alignment) {
881            let pos = self.to_span_index(pos);
882            self.errors.insert(
883                0,
884                ParseError {
885                    description: "expected format parameter to occur after `:`".to_owned(),
886                    note: None,
887                    label: format!("expected `{}` to occur after `:`", alignment),
888                    span: pos.to(pos),
889                    secondary_label: None,
890                    suggestion: Suggestion::None,
891                },
892            );
893        }
894    }
895
896    fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) {
897        if let Some(end) = self.consume_pos('.') {
898            let byte_pos = self.to_span_index(end);
899            let start = InnerOffset(byte_pos.0 + 1);
900            let field = self.argument(start);
901            // We can only parse simple `foo.bar` field access or `foo.0` tuple index access, any
902            // deeper nesting, or another type of expression, like method calls, are not supported
903            if !self.consume('}') {
904                return;
905            }
906            if let ArgumentNamed(_) = arg.position {
907                match field.position {
908                    ArgumentNamed(_) => {
909                        self.errors.insert(
910                            0,
911                            ParseError {
912                                description: "field access isn't supported".to_string(),
913                                note: None,
914                                label: "not supported".to_string(),
915                                span: InnerSpan::new(
916                                    arg.position_span.start,
917                                    field.position_span.end,
918                                ),
919                                secondary_label: None,
920                                suggestion: Suggestion::UsePositional,
921                            },
922                        );
923                    }
924                    ArgumentIs(_) => {
925                        self.errors.insert(
926                            0,
927                            ParseError {
928                                description: "tuple index access isn't supported".to_string(),
929                                note: None,
930                                label: "not supported".to_string(),
931                                span: InnerSpan::new(
932                                    arg.position_span.start,
933                                    field.position_span.end,
934                                ),
935                                secondary_label: None,
936                                suggestion: Suggestion::UsePositional,
937                            },
938                        );
939                    }
940                    _ => {}
941                };
942            }
943        }
944    }
945
946    fn suggest_format_parameter(&mut self, c: char) {
947        let replacement = match c {
948            '#' => "#?",
949            'x' => "x?",
950            'X' => "X?",
951            _ => return,
952        };
953        let Some(pos) = self.consume_pos(c) else {
954            return;
955        };
956
957        let span = self.span(pos - 1, pos + 1);
958        let pos = self.to_span_index(pos);
959
960        self.errors.insert(
961            0,
962            ParseError {
963                description: format!("expected `}}`, found `{c}`"),
964                note: None,
965                label: "expected `'}'`".into(),
966                span: pos.to(pos),
967                secondary_label: None,
968                suggestion: Suggestion::ReorderFormatParameter(span, format!("{replacement}")),
969            },
970        )
971    }
972}
973
974/// Finds the indices of all characters that have been processed and differ between the actual
975/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
976/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
977fn find_width_map_from_snippet(
978    input: &str,
979    snippet: Option<String>,
980    str_style: Option<usize>,
981) -> InputStringKind {
982    let snippet = match snippet {
983        Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
984        _ => return InputStringKind::NotALiteral,
985    };
986
987    if str_style.is_some() {
988        return InputStringKind::Literal { width_mappings: Vec::new() };
989    }
990
991    // Strip quotes.
992    let snippet = &snippet[1..snippet.len() - 1];
993
994    // Macros like `println` add a newline at the end. That technically doesn't make them "literals" anymore, but it's fine
995    // since we will never need to point our spans there, so we lie about it here by ignoring it.
996    // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines.
997    // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up.
998    // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up.
999    let input_no_nl = input.trim_end_matches('\n');
1000    let Some(unescaped) = unescape_string(snippet) else {
1001        return InputStringKind::NotALiteral;
1002    };
1003
1004    let unescaped_no_nl = unescaped.trim_end_matches('\n');
1005
1006    if unescaped_no_nl != input_no_nl {
1007        // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect.
1008        // This can for example happen with proc macros that respan generated literals.
1009        return InputStringKind::NotALiteral;
1010    }
1011
1012    let mut s = snippet.char_indices();
1013    let mut width_mappings = vec![];
1014    while let Some((pos, c)) = s.next() {
1015        match (c, s.clone().next()) {
1016            // skip whitespace and empty lines ending in '\\'
1017            ('\\', Some((_, '\n'))) => {
1018                let _ = s.next();
1019                let mut width = 2;
1020
1021                while let Some((_, c)) = s.clone().next() {
1022                    if matches!(c, ' ' | '\n' | '\t') {
1023                        width += 1;
1024                        let _ = s.next();
1025                    } else {
1026                        break;
1027                    }
1028                }
1029
1030                width_mappings.push(InnerWidthMapping::new(pos, width, 0));
1031            }
1032            ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
1033                width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
1034                let _ = s.next();
1035            }
1036            ('\\', Some((_, 'x'))) => {
1037                // consume `\xAB` literal
1038                s.nth(2);
1039                width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
1040            }
1041            ('\\', Some((_, 'u'))) => {
1042                let mut width = 2;
1043                let _ = s.next();
1044
1045                if let Some((_, next_c)) = s.next() {
1046                    if next_c == '{' {
1047                        // consume up to 6 hexanumeric chars
1048                        let digits_len =
1049                            s.clone().take(6).take_while(|(_, c)| c.is_ascii_hexdigit()).count();
1050
1051                        let len_utf8 = s
1052                            .as_str()
1053                            .get(..digits_len)
1054                            .and_then(|digits| u32::from_str_radix(digits, 16).ok())
1055                            .and_then(char::from_u32)
1056                            .map_or(1, char::len_utf8);
1057
1058                        // Skip the digits, for chars that encode to more than 1 utf-8 byte
1059                        // exclude as many digits as it is greater than 1 byte
1060                        //
1061                        // So for a 3 byte character, exclude 2 digits
1062                        let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
1063
1064                        // skip '{' and '}' also
1065                        width += required_skips + 2;
1066
1067                        s.nth(digits_len);
1068                    } else if next_c.is_ascii_hexdigit() {
1069                        width += 1;
1070
1071                        // We suggest adding `{` and `}` when appropriate, accept it here as if
1072                        // it were correct
1073                        let mut i = 0; // consume up to 6 hexanumeric chars
1074                        while let (Some((_, c)), _) = (s.next(), i < 6) {
1075                            if c.is_ascii_hexdigit() {
1076                                width += 1;
1077                            } else {
1078                                break;
1079                            }
1080                            i += 1;
1081                        }
1082                    }
1083                }
1084
1085                width_mappings.push(InnerWidthMapping::new(pos, width, 1));
1086            }
1087            _ => {}
1088        }
1089    }
1090
1091    InputStringKind::Literal { width_mappings }
1092}
1093
1094fn unescape_string(string: &str) -> Option<String> {
1095    let mut buf = String::new();
1096    let mut ok = true;
1097    unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
1098        match unescaped_char {
1099            Ok(c) => buf.push(c),
1100            Err(_) => ok = false,
1101        }
1102    });
1103
1104    ok.then_some(buf)
1105}
1106
1107// Assert a reasonable size for `Piece`
1108#[cfg(all(test, target_pointer_width = "64"))]
1109rustc_index::static_assert_size!(Piece<'_>, 16);
1110
1111#[cfg(test)]
1112mod tests;