rustc_builtin_macros/
format_foreign.rs

1pub(crate) mod printf {
2    use rustc_span::InnerSpan;
3
4    use super::strcursor::StrCursor as Cur;
5
6    /// Represents a single `printf`-style substitution.
7    #[derive(Clone, PartialEq, Debug)]
8    pub(crate) enum Substitution<'a> {
9        /// A formatted output substitution with its internal byte offset.
10        Format(Format<'a>),
11        /// A literal `%%` escape, with its start and end indices.
12        Escape((usize, usize)),
13    }
14
15    impl<'a> Substitution<'a> {
16        pub(crate) fn as_str(&self) -> &str {
17            match self {
18                Substitution::Format(fmt) => fmt.span,
19                Substitution::Escape(_) => "%%",
20            }
21        }
22
23        pub(crate) fn position(&self) -> InnerSpan {
24            match self {
25                Substitution::Format(fmt) => fmt.position,
26                &Substitution::Escape((start, end)) => InnerSpan::new(start, end),
27            }
28        }
29
30        pub(crate) fn set_position(&mut self, start: usize, end: usize) {
31            match self {
32                Substitution::Format(fmt) => fmt.position = InnerSpan::new(start, end),
33                Substitution::Escape(pos) => *pos = (start, end),
34            }
35        }
36
37        /// Translate this substitution into an equivalent Rust formatting directive.
38        ///
39        /// This ignores cases where the substitution does not have an exact equivalent, or where
40        /// the substitution would be unnecessary.
41        pub(crate) fn translate(&self) -> Result<String, Option<String>> {
42            match self {
43                Substitution::Format(fmt) => fmt.translate(),
44                Substitution::Escape(_) => Err(None),
45            }
46        }
47    }
48
49    #[derive(Clone, PartialEq, Debug)]
50    /// A single `printf`-style formatting directive.
51    pub(crate) struct Format<'a> {
52        /// The entire original formatting directive.
53        span: &'a str,
54        /// The (1-based) parameter to be converted.
55        parameter: Option<u16>,
56        /// Formatting flags.
57        flags: &'a str,
58        /// Minimum width of the output.
59        width: Option<Num>,
60        /// Precision of the conversion.
61        precision: Option<Num>,
62        /// Length modifier for the conversion.
63        length: Option<&'a str>,
64        /// Type of parameter being converted.
65        type_: &'a str,
66        /// Byte offset for the start and end of this formatting directive.
67        position: InnerSpan,
68    }
69
70    impl Format<'_> {
71        /// Translate this directive into an equivalent Rust formatting directive.
72        ///
73        /// Returns `Err` in cases where the `printf` directive does not have an exact Rust
74        /// equivalent, rather than guessing.
75        pub(crate) fn translate(&self) -> Result<String, Option<String>> {
76            use std::fmt::Write;
77
78            let (c_alt, c_zero, c_left, c_plus) = {
79                let mut c_alt = false;
80                let mut c_zero = false;
81                let mut c_left = false;
82                let mut c_plus = false;
83                for c in self.flags.chars() {
84                    match c {
85                        '#' => c_alt = true,
86                        '0' => c_zero = true,
87                        '-' => c_left = true,
88                        '+' => c_plus = true,
89                        _ => {
90                            return Err(Some(format!("the flag `{c}` is unknown or unsupported")));
91                        }
92                    }
93                }
94                (c_alt, c_zero, c_left, c_plus)
95            };
96
97            // Has a special form in Rust for numbers.
98            let fill = c_zero.then_some("0");
99
100            let align = c_left.then_some("<");
101
102            // Rust doesn't have an equivalent to the `' '` flag.
103            let sign = c_plus.then_some("+");
104
105            // Not *quite* the same, depending on the type...
106            let alt = c_alt;
107
108            let width = match self.width {
109                Some(Num::Next) => {
110                    // NOTE: Rust doesn't support this.
111                    return Err(Some(
112                        "you have to use a positional or named parameter for the width".to_string(),
113                    ));
114                }
115                w @ Some(Num::Arg(_)) => w,
116                w @ Some(Num::Num(_)) => w,
117                None => None,
118            };
119
120            let precision = self.precision;
121
122            // NOTE: although length *can* have an effect, we can't duplicate the effect in Rust, so
123            // we just ignore it.
124
125            let (type_, use_zero_fill, is_int) = match self.type_ {
126                "d" | "i" | "u" => (None, true, true),
127                "f" | "F" => (None, false, false),
128                "s" | "c" => (None, false, false),
129                "e" | "E" => (Some(self.type_), true, false),
130                "x" | "X" | "o" => (Some(self.type_), true, true),
131                "p" => (Some(self.type_), false, true),
132                "g" => (Some("e"), true, false),
133                "G" => (Some("E"), true, false),
134                _ => {
135                    return Err(Some(format!(
136                        "the conversion specifier `{}` is unknown or unsupported",
137                        self.type_
138                    )));
139                }
140            };
141
142            let (fill, width, precision) = match (is_int, width, precision) {
143                (true, Some(_), Some(_)) => {
144                    // Rust can't duplicate this insanity.
145                    return Err(Some(
146                        "width and precision cannot both be specified for integer conversions"
147                            .to_string(),
148                    ));
149                }
150                (true, None, Some(p)) => (Some("0"), Some(p), None),
151                (true, w, None) => (fill, w, None),
152                (false, w, p) => (fill, w, p),
153            };
154
155            let align = match (self.type_, width.is_some(), align.is_some()) {
156                ("s", true, false) => Some(">"),
157                _ => align,
158            };
159
160            let (fill, zero_fill) = match (fill, use_zero_fill) {
161                (Some("0"), true) => (None, true),
162                (fill, _) => (fill, false),
163            };
164
165            let alt = match type_ {
166                Some("x" | "X") => alt,
167                _ => false,
168            };
169
170            let has_options = fill.is_some()
171                || align.is_some()
172                || sign.is_some()
173                || alt
174                || zero_fill
175                || width.is_some()
176                || precision.is_some()
177                || type_.is_some();
178
179            // Initialise with a rough guess.
180            let cap = self.span.len() + if has_options { 2 } else { 0 };
181            let mut s = String::with_capacity(cap);
182
183            s.push('{');
184
185            if let Some(arg) = self.parameter {
186                match write!(
187                    s,
188                    "{}",
189                    match arg.checked_sub(1) {
190                        Some(a) => a,
191                        None => return Err(None),
192                    }
193                ) {
194                    Err(_) => return Err(None),
195                    _ => {}
196                }
197            }
198
199            if has_options {
200                s.push(':');
201
202                let align = if let Some(fill) = fill {
203                    s.push_str(fill);
204                    align.or(Some(">"))
205                } else {
206                    align
207                };
208
209                if let Some(align) = align {
210                    s.push_str(align);
211                }
212
213                if let Some(sign) = sign {
214                    s.push_str(sign);
215                }
216
217                if alt {
218                    s.push('#');
219                }
220
221                if zero_fill {
222                    s.push('0');
223                }
224
225                if let Some(width) = width {
226                    match width.translate(&mut s) {
227                        Err(_) => return Err(None),
228                        _ => {}
229                    }
230                }
231
232                if let Some(precision) = precision {
233                    s.push('.');
234                    match precision.translate(&mut s) {
235                        Err(_) => return Err(None),
236                        _ => {}
237                    }
238                }
239
240                if let Some(type_) = type_ {
241                    s.push_str(type_);
242                }
243            }
244
245            s.push('}');
246            Ok(s)
247        }
248    }
249
250    /// A general number used in a `printf` formatting directive.
251    #[derive(Copy, Clone, PartialEq, Debug)]
252    enum Num {
253        // The range of these values is technically bounded by `NL_ARGMAX`... but, at least for GNU
254        // libc, it apparently has no real fixed limit. A `u16` is used here on the basis that it
255        // is *vanishingly* unlikely that *anyone* is going to try formatting something wider, or
256        // with more precision, than 32 thousand positions which is so wide it couldn't possibly fit
257        // on a screen.
258        /// A specific, fixed value.
259        Num(u16),
260        /// The value is derived from a positional argument.
261        Arg(u16),
262        /// The value is derived from the "next" unconverted argument.
263        Next,
264    }
265
266    impl Num {
267        fn from_str(s: &str, arg: Option<&str>) -> Option<Self> {
268            if let Some(arg) = arg {
269                arg.parse().ok().map(|arg| Num::Arg(arg))
270            } else if s == "*" {
271                Some(Num::Next)
272            } else {
273                s.parse().ok().map(|num| Num::Num(num))
274            }
275        }
276
277        fn translate(&self, s: &mut String) -> std::fmt::Result {
278            use std::fmt::Write;
279            match *self {
280                Num::Num(n) => write!(s, "{n}"),
281                Num::Arg(n) => {
282                    let n = n.checked_sub(1).ok_or(std::fmt::Error)?;
283                    write!(s, "{n}$")
284                }
285                Num::Next => write!(s, "*"),
286            }
287        }
288    }
289
290    /// Returns an iterator over all substitutions in a given string.
291    pub(crate) fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
292        Substitutions { s, pos: start_pos }
293    }
294
295    /// Iterator over substitutions in a string.
296    pub(crate) struct Substitutions<'a> {
297        s: &'a str,
298        pos: usize,
299    }
300
301    impl<'a> Iterator for Substitutions<'a> {
302        type Item = Substitution<'a>;
303        fn next(&mut self) -> Option<Self::Item> {
304            let (mut sub, tail) = parse_next_substitution(self.s)?;
305            self.s = tail;
306            let InnerSpan { start, end } = sub.position();
307            sub.set_position(start + self.pos, end + self.pos);
308            self.pos += end;
309            Some(sub)
310        }
311
312        fn size_hint(&self) -> (usize, Option<usize>) {
313            // Substitutions are at least 2 characters long.
314            (0, Some(self.s.len() / 2))
315        }
316    }
317
318    enum State {
319        Start,
320        Flags,
321        Width,
322        WidthArg,
323        Prec,
324        PrecInner,
325        Length,
326        Type,
327    }
328
329    /// Parse the next substitution from the input string.
330    fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
331        use self::State::*;
332
333        let at = {
334            let start = s.find('%')?;
335            if let '%' = s[start + 1..].chars().next()? {
336                return Some((Substitution::Escape((start, start + 2)), &s[start + 2..]));
337            }
338
339            Cur::new_at(s, start)
340        };
341
342        // This is meant to be a translation of the following regex:
343        //
344        // ```regex
345        // (?x)
346        // ^ %
347        // (?: (?P<parameter> \d+) \$ )?
348        // (?P<flags> [-+ 0\#']* )
349        // (?P<width> \d+ | \* (?: (?P<widtha> \d+) \$ )? )?
350        // (?: \. (?P<precision> \d+ | \* (?: (?P<precisiona> \d+) \$ )? ) )?
351        // (?P<length>
352        //     # Standard
353        //     hh | h | ll | l | L | z | j | t
354        //
355        //     # Other
356        //     | I32 | I64 | I | q
357        // )?
358        // (?P<type> . )
359        // ```
360
361        // Used to establish the full span at the end.
362        let start = at;
363        // The current position within the string.
364        let mut at = at.at_next_cp()?;
365        // `c` is the next codepoint, `next` is a cursor after it.
366        let (mut c, mut next) = at.next_cp()?;
367
368        // Update `at`, `c`, and `next`, exiting if we're out of input.
369        macro_rules! move_to {
370            ($cur:expr) => {{
371                at = $cur;
372                let (c_, next_) = at.next_cp()?;
373                c = c_;
374                next = next_;
375            }};
376        }
377
378        // Constructs a result when parsing fails.
379        //
380        // Note: `move` used to capture copies of the cursors as they are *now*.
381        let fallback = move || {
382            Some((
383                Substitution::Format(Format {
384                    span: start.slice_between(next).unwrap(),
385                    parameter: None,
386                    flags: "",
387                    width: None,
388                    precision: None,
389                    length: None,
390                    type_: at.slice_between(next).unwrap(),
391                    position: InnerSpan::new(start.at, next.at),
392                }),
393                next.slice_after(),
394            ))
395        };
396
397        // Next parsing state.
398        let mut state = Start;
399
400        // Sadly, Rust isn't *quite* smart enough to know these *must* be initialised by the end.
401        let mut parameter: Option<u16> = None;
402        let mut flags: &str = "";
403        let mut width: Option<Num> = None;
404        let mut precision: Option<Num> = None;
405        let mut length: Option<&str> = None;
406        let mut type_: &str = "";
407        let end: Cur<'_>;
408
409        if let Start = state {
410            match c {
411                '1'..='9' => {
412                    let end = at_next_cp_while(next, char::is_ascii_digit);
413                    match end.next_cp() {
414                        // Yes, this *is* the parameter.
415                        Some(('$', end2)) => {
416                            state = Flags;
417                            parameter = Some(at.slice_between(end).unwrap().parse().unwrap());
418                            move_to!(end2);
419                        }
420                        // Wait, no, actually, it's the width.
421                        Some(_) => {
422                            state = Prec;
423                            parameter = None;
424                            flags = "";
425                            width = at.slice_between(end).and_then(|num| Num::from_str(num, None));
426                            if width.is_none() {
427                                return fallback();
428                            }
429                            move_to!(end);
430                        }
431                        // It's invalid, is what it is.
432                        None => return fallback(),
433                    }
434                }
435                _ => {
436                    state = Flags;
437                    parameter = None;
438                    move_to!(at);
439                }
440            }
441        }
442
443        if let Flags = state {
444            let end = at_next_cp_while(at, is_flag);
445            state = Width;
446            flags = at.slice_between(end).unwrap();
447            move_to!(end);
448        }
449
450        if let Width = state {
451            match c {
452                '*' => {
453                    state = WidthArg;
454                    move_to!(next);
455                }
456                '1'..='9' => {
457                    let end = at_next_cp_while(next, char::is_ascii_digit);
458                    state = Prec;
459                    width = at.slice_between(end).and_then(|num| Num::from_str(num, None));
460                    if width.is_none() {
461                        return fallback();
462                    }
463                    move_to!(end);
464                }
465                _ => {
466                    state = Prec;
467                    width = None;
468                    move_to!(at);
469                }
470            }
471        }
472
473        if let WidthArg = state {
474            let end = at_next_cp_while(at, char::is_ascii_digit);
475            match end.next_cp() {
476                Some(('$', end2)) => {
477                    state = Prec;
478                    width = Num::from_str("", at.slice_between(end));
479                    move_to!(end2);
480                }
481                _ => {
482                    state = Prec;
483                    width = Some(Num::Next);
484                    move_to!(end);
485                }
486            }
487        }
488
489        if let Prec = state {
490            match c {
491                '.' => {
492                    state = PrecInner;
493                    move_to!(next);
494                }
495                _ => {
496                    state = Length;
497                    precision = None;
498                    move_to!(at);
499                }
500            }
501        }
502
503        if let PrecInner = state {
504            match c {
505                '*' => {
506                    let end = at_next_cp_while(next, char::is_ascii_digit);
507                    match end.next_cp() {
508                        Some(('$', end2)) => {
509                            state = Length;
510                            precision = Num::from_str("*", next.slice_between(end));
511                            move_to!(end2);
512                        }
513                        _ => {
514                            state = Length;
515                            precision = Some(Num::Next);
516                            move_to!(end);
517                        }
518                    }
519                }
520                '0'..='9' => {
521                    let end = at_next_cp_while(next, char::is_ascii_digit);
522                    state = Length;
523                    precision = at.slice_between(end).and_then(|num| Num::from_str(num, None));
524                    move_to!(end);
525                }
526                _ => return fallback(),
527            }
528        }
529
530        if let Length = state {
531            let c1_next1 = next.next_cp();
532            match (c, c1_next1) {
533                ('h', Some(('h', next1))) | ('l', Some(('l', next1))) => {
534                    state = Type;
535                    length = Some(at.slice_between(next1).unwrap());
536                    move_to!(next1);
537                }
538
539                ('h' | 'l' | 'L' | 'z' | 'j' | 't' | 'q', _) => {
540                    state = Type;
541                    length = Some(at.slice_between(next).unwrap());
542                    move_to!(next);
543                }
544
545                ('I', _) => {
546                    let end = next
547                        .at_next_cp()
548                        .and_then(|end| end.at_next_cp())
549                        .map(|end| (next.slice_between(end).unwrap(), end));
550                    let end = match end {
551                        Some(("32" | "64", end)) => end,
552                        _ => next,
553                    };
554                    state = Type;
555                    length = Some(at.slice_between(end).unwrap());
556                    move_to!(end);
557                }
558
559                _ => {
560                    state = Type;
561                    length = None;
562                    move_to!(at);
563                }
564            }
565        }
566
567        if let Type = state {
568            type_ = at.slice_between(next).unwrap();
569
570            // Don't use `move_to!` here, as we *can* be at the end of the input.
571            at = next;
572        }
573
574        let _ = c; // to avoid never used value
575
576        end = at;
577        let position = InnerSpan::new(start.at, end.at);
578
579        let f = Format {
580            span: start.slice_between(end).unwrap(),
581            parameter,
582            flags,
583            width,
584            precision,
585            length,
586            type_,
587            position,
588        };
589        Some((Substitution::Format(f), end.slice_after()))
590    }
591
592    fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
593    where
594        F: FnMut(&char) -> bool,
595    {
596        loop {
597            match cur.next_cp() {
598                Some((c, next)) => {
599                    if pred(&c) {
600                        cur = next;
601                    } else {
602                        return cur;
603                    }
604                }
605                None => return cur,
606            }
607        }
608    }
609
610    fn is_flag(c: &char) -> bool {
611        matches!(c, '0' | '-' | '+' | ' ' | '#' | '\'')
612    }
613
614    #[cfg(test)]
615    mod tests;
616}
617
618pub(crate) mod shell {
619    use rustc_span::InnerSpan;
620
621    use super::strcursor::StrCursor as Cur;
622
623    #[derive(Clone, PartialEq, Debug)]
624    pub(crate) enum Substitution<'a> {
625        Ordinal(u8, (usize, usize)),
626        Name(&'a str, (usize, usize)),
627        Escape((usize, usize)),
628    }
629
630    impl Substitution<'_> {
631        pub(crate) fn as_str(&self) -> String {
632            match self {
633                Substitution::Ordinal(n, _) => format!("${n}"),
634                Substitution::Name(n, _) => format!("${n}"),
635                Substitution::Escape(_) => "$$".into(),
636            }
637        }
638
639        pub(crate) fn position(&self) -> InnerSpan {
640            let (Self::Ordinal(_, pos) | Self::Name(_, pos) | Self::Escape(pos)) = self;
641            InnerSpan::new(pos.0, pos.1)
642        }
643
644        fn set_position(&mut self, start: usize, end: usize) {
645            let (Self::Ordinal(_, pos) | Self::Name(_, pos) | Self::Escape(pos)) = self;
646            *pos = (start, end);
647        }
648
649        pub(crate) fn translate(&self) -> Result<String, Option<String>> {
650            match self {
651                Substitution::Ordinal(n, _) => Ok(format!("{{{}}}", n)),
652                Substitution::Name(n, _) => Ok(format!("{{{}}}", n)),
653                Substitution::Escape(_) => Err(None),
654            }
655        }
656    }
657
658    /// Returns an iterator over all substitutions in a given string.
659    pub(crate) fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
660        Substitutions { s, pos: start_pos }
661    }
662
663    /// Iterator over substitutions in a string.
664    pub(crate) struct Substitutions<'a> {
665        s: &'a str,
666        pos: usize,
667    }
668
669    impl<'a> Iterator for Substitutions<'a> {
670        type Item = Substitution<'a>;
671        fn next(&mut self) -> Option<Self::Item> {
672            let (mut sub, tail) = parse_next_substitution(self.s)?;
673            self.s = tail;
674            let InnerSpan { start, end } = sub.position();
675            sub.set_position(start + self.pos, end + self.pos);
676            self.pos += end;
677            Some(sub)
678        }
679
680        fn size_hint(&self) -> (usize, Option<usize>) {
681            (0, Some(self.s.len()))
682        }
683    }
684
685    /// Parse the next substitution from the input string.
686    fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
687        let at = {
688            let start = s.find('$')?;
689            match s[start + 1..].chars().next()? {
690                '$' => return Some((Substitution::Escape((start, start + 2)), &s[start + 2..])),
691                c @ '0'..='9' => {
692                    let n = (c as u8) - b'0';
693                    return Some((Substitution::Ordinal(n, (start, start + 2)), &s[start + 2..]));
694                }
695                _ => { /* fall-through */ }
696            }
697
698            Cur::new_at(s, start)
699        };
700
701        let at = at.at_next_cp()?;
702        let (c, inner) = at.next_cp()?;
703
704        if !is_ident_head(c) {
705            None
706        } else {
707            let end = at_next_cp_while(inner, is_ident_tail);
708            let slice = at.slice_between(end).unwrap();
709            let start = at.at - 1;
710            let end_pos = at.at + slice.len();
711            Some((Substitution::Name(slice, (start, end_pos)), end.slice_after()))
712        }
713    }
714
715    fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
716    where
717        F: FnMut(char) -> bool,
718    {
719        loop {
720            match cur.next_cp() {
721                Some((c, next)) => {
722                    if pred(c) {
723                        cur = next;
724                    } else {
725                        return cur;
726                    }
727                }
728                None => return cur,
729            }
730        }
731    }
732
733    fn is_ident_head(c: char) -> bool {
734        c.is_ascii_alphabetic() || c == '_'
735    }
736
737    fn is_ident_tail(c: char) -> bool {
738        c.is_ascii_alphanumeric() || c == '_'
739    }
740
741    #[cfg(test)]
742    mod tests;
743}
744
745mod strcursor {
746    pub(crate) struct StrCursor<'a> {
747        s: &'a str,
748        pub at: usize,
749    }
750
751    impl<'a> StrCursor<'a> {
752        pub(crate) fn new_at(s: &'a str, at: usize) -> StrCursor<'a> {
753            StrCursor { s, at }
754        }
755
756        pub(crate) fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
757            match self.try_seek_right_cp() {
758                true => Some(self),
759                false => None,
760            }
761        }
762
763        pub(crate) fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
764            let cp = self.cp_after()?;
765            self.seek_right(cp.len_utf8());
766            Some((cp, self))
767        }
768
769        fn slice_before(&self) -> &'a str {
770            &self.s[0..self.at]
771        }
772
773        pub(crate) fn slice_after(&self) -> &'a str {
774            &self.s[self.at..]
775        }
776
777        pub(crate) fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
778            if !str_eq_literal(self.s, until.s) {
779                None
780            } else {
781                use std::cmp::{max, min};
782                let beg = min(self.at, until.at);
783                let end = max(self.at, until.at);
784                Some(&self.s[beg..end])
785            }
786        }
787
788        fn cp_after(&self) -> Option<char> {
789            self.slice_after().chars().next()
790        }
791
792        fn try_seek_right_cp(&mut self) -> bool {
793            match self.slice_after().chars().next() {
794                Some(c) => {
795                    self.at += c.len_utf8();
796                    true
797                }
798                None => false,
799            }
800        }
801
802        fn seek_right(&mut self, bytes: usize) {
803            self.at += bytes;
804        }
805    }
806
807    impl Copy for StrCursor<'_> {}
808
809    impl<'a> Clone for StrCursor<'a> {
810        fn clone(&self) -> StrCursor<'a> {
811            *self
812        }
813    }
814
815    impl std::fmt::Debug for StrCursor<'_> {
816        fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
817            write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
818        }
819    }
820
821    fn str_eq_literal(a: &str, b: &str) -> bool {
822        a.as_bytes().as_ptr() == b.as_bytes().as_ptr() && a.len() == b.len()
823    }
824}