rustc_builtin_macros/
format_foreign.rs

1pub(crate) mod printf {
2    use rustc_span::InnerSpan;
3
4    use super::strcursor::StrCursor as Cur;
5
6    /// Represents a single `printf`-style substitution.
7    #[derive(Clone, PartialEq, Debug)]
8    pub(crate) enum Substitution<'a> {
9        /// A formatted output substitution with its internal byte offset.
10        Format(Format<'a>),
11        /// A literal `%%` escape, with its start and end indices.
12        Escape((usize, usize)),
13    }
14
15    impl ToString for Substitution<'_> {
16        fn to_string(&self) -> String {
17            match self {
18                Substitution::Format(fmt) => fmt.span.into(),
19                Substitution::Escape(_) => "%%".into(),
20            }
21        }
22    }
23
24    impl Substitution<'_> {
25        pub(crate) fn position(&self) -> InnerSpan {
26            match self {
27                Substitution::Format(fmt) => fmt.position,
28                &Substitution::Escape((start, end)) => InnerSpan::new(start, end),
29            }
30        }
31
32        pub(crate) fn set_position(&mut self, start: usize, end: usize) {
33            match self {
34                Substitution::Format(fmt) => fmt.position = InnerSpan::new(start, end),
35                Substitution::Escape(pos) => *pos = (start, end),
36            }
37        }
38
39        /// Translate this substitution into an equivalent Rust formatting directive.
40        ///
41        /// This ignores cases where the substitution does not have an exact equivalent, or where
42        /// the substitution would be unnecessary.
43        pub(crate) fn translate(&self) -> Result<String, Option<String>> {
44            match self {
45                Substitution::Format(fmt) => fmt.translate(),
46                Substitution::Escape(_) => Err(None),
47            }
48        }
49    }
50
51    #[derive(Clone, PartialEq, Debug)]
52    /// A single `printf`-style formatting directive.
53    pub(crate) struct Format<'a> {
54        /// The entire original formatting directive.
55        span: &'a str,
56        /// The (1-based) parameter to be converted.
57        parameter: Option<u16>,
58        /// Formatting flags.
59        flags: &'a str,
60        /// Minimum width of the output.
61        width: Option<Num>,
62        /// Precision of the conversion.
63        precision: Option<Num>,
64        /// Length modifier for the conversion.
65        length: Option<&'a str>,
66        /// Type of parameter being converted.
67        type_: &'a str,
68        /// Byte offset for the start and end of this formatting directive.
69        position: InnerSpan,
70    }
71
72    impl Format<'_> {
73        /// Translate this directive into an equivalent Rust formatting directive.
74        ///
75        /// Returns `Err` in cases where the `printf` directive does not have an exact Rust
76        /// equivalent, rather than guessing.
77        pub(crate) fn translate(&self) -> Result<String, Option<String>> {
78            use std::fmt::Write;
79
80            let (c_alt, c_zero, c_left, c_plus) = {
81                let mut c_alt = false;
82                let mut c_zero = false;
83                let mut c_left = false;
84                let mut c_plus = false;
85                for c in self.flags.chars() {
86                    match c {
87                        '#' => c_alt = true,
88                        '0' => c_zero = true,
89                        '-' => c_left = true,
90                        '+' => c_plus = true,
91                        _ => {
92                            return Err(Some(format!("the flag `{c}` is unknown or unsupported")));
93                        }
94                    }
95                }
96                (c_alt, c_zero, c_left, c_plus)
97            };
98
99            // Has a special form in Rust for numbers.
100            let fill = c_zero.then_some("0");
101
102            let align = c_left.then_some("<");
103
104            // Rust doesn't have an equivalent to the `' '` flag.
105            let sign = c_plus.then_some("+");
106
107            // Not *quite* the same, depending on the type...
108            let alt = c_alt;
109
110            let width = match self.width {
111                Some(Num::Next) => {
112                    // NOTE: Rust doesn't support this.
113                    return Err(Some(
114                        "you have to use a positional or named parameter for the width".to_string(),
115                    ));
116                }
117                w @ Some(Num::Arg(_)) => w,
118                w @ Some(Num::Num(_)) => w,
119                None => None,
120            };
121
122            let precision = self.precision;
123
124            // NOTE: although length *can* have an effect, we can't duplicate the effect in Rust, so
125            // we just ignore it.
126
127            let (type_, use_zero_fill, is_int) = match self.type_ {
128                "d" | "i" | "u" => (None, true, true),
129                "f" | "F" => (None, false, false),
130                "s" | "c" => (None, false, false),
131                "e" | "E" => (Some(self.type_), true, false),
132                "x" | "X" | "o" => (Some(self.type_), true, true),
133                "p" => (Some(self.type_), false, true),
134                "g" => (Some("e"), true, false),
135                "G" => (Some("E"), true, false),
136                _ => {
137                    return Err(Some(format!(
138                        "the conversion specifier `{}` is unknown or unsupported",
139                        self.type_
140                    )));
141                }
142            };
143
144            let (fill, width, precision) = match (is_int, width, precision) {
145                (true, Some(_), Some(_)) => {
146                    // Rust can't duplicate this insanity.
147                    return Err(Some(
148                        "width and precision cannot both be specified for integer conversions"
149                            .to_string(),
150                    ));
151                }
152                (true, None, Some(p)) => (Some("0"), Some(p), None),
153                (true, w, None) => (fill, w, None),
154                (false, w, p) => (fill, w, p),
155            };
156
157            let align = match (self.type_, width.is_some(), align.is_some()) {
158                ("s", true, false) => Some(">"),
159                _ => align,
160            };
161
162            let (fill, zero_fill) = match (fill, use_zero_fill) {
163                (Some("0"), true) => (None, true),
164                (fill, _) => (fill, false),
165            };
166
167            let alt = match type_ {
168                Some("x" | "X") => alt,
169                _ => false,
170            };
171
172            let has_options = fill.is_some()
173                || align.is_some()
174                || sign.is_some()
175                || alt
176                || zero_fill
177                || width.is_some()
178                || precision.is_some()
179                || type_.is_some();
180
181            // Initialise with a rough guess.
182            let cap = self.span.len() + if has_options { 2 } else { 0 };
183            let mut s = String::with_capacity(cap);
184
185            s.push('{');
186
187            if let Some(arg) = self.parameter {
188                match write!(
189                    s,
190                    "{}",
191                    match arg.checked_sub(1) {
192                        Some(a) => a,
193                        None => return Err(None),
194                    }
195                ) {
196                    Err(_) => return Err(None),
197                    _ => {}
198                }
199            }
200
201            if has_options {
202                s.push(':');
203
204                let align = if let Some(fill) = fill {
205                    s.push_str(fill);
206                    align.or(Some(">"))
207                } else {
208                    align
209                };
210
211                if let Some(align) = align {
212                    s.push_str(align);
213                }
214
215                if let Some(sign) = sign {
216                    s.push_str(sign);
217                }
218
219                if alt {
220                    s.push('#');
221                }
222
223                if zero_fill {
224                    s.push('0');
225                }
226
227                if let Some(width) = width {
228                    match width.translate(&mut s) {
229                        Err(_) => return Err(None),
230                        _ => {}
231                    }
232                }
233
234                if let Some(precision) = precision {
235                    s.push('.');
236                    match precision.translate(&mut s) {
237                        Err(_) => return Err(None),
238                        _ => {}
239                    }
240                }
241
242                if let Some(type_) = type_ {
243                    s.push_str(type_);
244                }
245            }
246
247            s.push('}');
248            Ok(s)
249        }
250    }
251
252    /// A general number used in a `printf` formatting directive.
253    #[derive(Copy, Clone, PartialEq, Debug)]
254    enum Num {
255        // The range of these values is technically bounded by `NL_ARGMAX`... but, at least for GNU
256        // libc, it apparently has no real fixed limit. A `u16` is used here on the basis that it
257        // is *vanishingly* unlikely that *anyone* is going to try formatting something wider, or
258        // with more precision, than 32 thousand positions which is so wide it couldn't possibly fit
259        // on a screen.
260        /// A specific, fixed value.
261        Num(u16),
262        /// The value is derived from a positional argument.
263        Arg(u16),
264        /// The value is derived from the "next" unconverted argument.
265        Next,
266    }
267
268    impl Num {
269        fn from_str(s: &str, arg: Option<&str>) -> Option<Self> {
270            if let Some(arg) = arg {
271                arg.parse().ok().map(|arg| Num::Arg(arg))
272            } else if s == "*" {
273                Some(Num::Next)
274            } else {
275                s.parse().ok().map(|num| Num::Num(num))
276            }
277        }
278
279        fn translate(&self, s: &mut String) -> std::fmt::Result {
280            use std::fmt::Write;
281            match *self {
282                Num::Num(n) => write!(s, "{n}"),
283                Num::Arg(n) => {
284                    let n = n.checked_sub(1).ok_or(std::fmt::Error)?;
285                    write!(s, "{n}$")
286                }
287                Num::Next => write!(s, "*"),
288            }
289        }
290    }
291
292    /// Returns an iterator over all substitutions in a given string.
293    pub(crate) fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
294        Substitutions { s, pos: start_pos }
295    }
296
297    /// Iterator over substitutions in a string.
298    pub(crate) struct Substitutions<'a> {
299        s: &'a str,
300        pos: usize,
301    }
302
303    impl<'a> Iterator for Substitutions<'a> {
304        type Item = Substitution<'a>;
305        fn next(&mut self) -> Option<Self::Item> {
306            let (mut sub, tail) = parse_next_substitution(self.s)?;
307            self.s = tail;
308            let InnerSpan { start, end } = sub.position();
309            sub.set_position(start + self.pos, end + self.pos);
310            self.pos += end;
311            Some(sub)
312        }
313
314        fn size_hint(&self) -> (usize, Option<usize>) {
315            // Substitutions are at least 2 characters long.
316            (0, Some(self.s.len() / 2))
317        }
318    }
319
320    enum State {
321        Start,
322        Flags,
323        Width,
324        WidthArg,
325        Prec,
326        PrecInner,
327        Length,
328        Type,
329    }
330
331    /// Parse the next substitution from the input string.
332    fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
333        use self::State::*;
334
335        let at = {
336            let start = s.find('%')?;
337            if let '%' = s[start + 1..].chars().next()? {
338                return Some((Substitution::Escape((start, start + 2)), &s[start + 2..]));
339            }
340
341            Cur::new_at(s, start)
342        };
343
344        // This is meant to be a translation of the following regex:
345        //
346        // ```regex
347        // (?x)
348        // ^ %
349        // (?: (?P<parameter> \d+) \$ )?
350        // (?P<flags> [-+ 0\#']* )
351        // (?P<width> \d+ | \* (?: (?P<widtha> \d+) \$ )? )?
352        // (?: \. (?P<precision> \d+ | \* (?: (?P<precisiona> \d+) \$ )? ) )?
353        // (?P<length>
354        //     # Standard
355        //     hh | h | ll | l | L | z | j | t
356        //
357        //     # Other
358        //     | I32 | I64 | I | q
359        // )?
360        // (?P<type> . )
361        // ```
362
363        // Used to establish the full span at the end.
364        let start = at;
365        // The current position within the string.
366        let mut at = at.at_next_cp()?;
367        // `c` is the next codepoint, `next` is a cursor after it.
368        let (mut c, mut next) = at.next_cp()?;
369
370        // Update `at`, `c`, and `next`, exiting if we're out of input.
371        macro_rules! move_to {
372            ($cur:expr) => {{
373                at = $cur;
374                let (c_, next_) = at.next_cp()?;
375                c = c_;
376                next = next_;
377            }};
378        }
379
380        // Constructs a result when parsing fails.
381        //
382        // Note: `move` used to capture copies of the cursors as they are *now*.
383        let fallback = move || {
384            Some((
385                Substitution::Format(Format {
386                    span: start.slice_between(next).unwrap(),
387                    parameter: None,
388                    flags: "",
389                    width: None,
390                    precision: None,
391                    length: None,
392                    type_: at.slice_between(next).unwrap(),
393                    position: InnerSpan::new(start.at, next.at),
394                }),
395                next.slice_after(),
396            ))
397        };
398
399        // Next parsing state.
400        let mut state = Start;
401
402        // Sadly, Rust isn't *quite* smart enough to know these *must* be initialised by the end.
403        let mut parameter: Option<u16> = None;
404        let mut flags: &str = "";
405        let mut width: Option<Num> = None;
406        let mut precision: Option<Num> = None;
407        let mut length: Option<&str> = None;
408        let mut type_: &str = "";
409        let end: Cur<'_>;
410
411        if let Start = state {
412            match c {
413                '1'..='9' => {
414                    let end = at_next_cp_while(next, char::is_ascii_digit);
415                    match end.next_cp() {
416                        // Yes, this *is* the parameter.
417                        Some(('$', end2)) => {
418                            state = Flags;
419                            parameter = Some(at.slice_between(end).unwrap().parse().unwrap());
420                            move_to!(end2);
421                        }
422                        // Wait, no, actually, it's the width.
423                        Some(_) => {
424                            state = Prec;
425                            parameter = None;
426                            flags = "";
427                            width = at.slice_between(end).and_then(|num| Num::from_str(num, None));
428                            if width.is_none() {
429                                return fallback();
430                            }
431                            move_to!(end);
432                        }
433                        // It's invalid, is what it is.
434                        None => return fallback(),
435                    }
436                }
437                _ => {
438                    state = Flags;
439                    parameter = None;
440                    move_to!(at);
441                }
442            }
443        }
444
445        if let Flags = state {
446            let end = at_next_cp_while(at, is_flag);
447            state = Width;
448            flags = at.slice_between(end).unwrap();
449            move_to!(end);
450        }
451
452        if let Width = state {
453            match c {
454                '*' => {
455                    state = WidthArg;
456                    move_to!(next);
457                }
458                '1'..='9' => {
459                    let end = at_next_cp_while(next, char::is_ascii_digit);
460                    state = Prec;
461                    width = at.slice_between(end).and_then(|num| Num::from_str(num, None));
462                    if width.is_none() {
463                        return fallback();
464                    }
465                    move_to!(end);
466                }
467                _ => {
468                    state = Prec;
469                    width = None;
470                    move_to!(at);
471                }
472            }
473        }
474
475        if let WidthArg = state {
476            let end = at_next_cp_while(at, char::is_ascii_digit);
477            match end.next_cp() {
478                Some(('$', end2)) => {
479                    state = Prec;
480                    width = Num::from_str("", at.slice_between(end));
481                    move_to!(end2);
482                }
483                _ => {
484                    state = Prec;
485                    width = Some(Num::Next);
486                    move_to!(end);
487                }
488            }
489        }
490
491        if let Prec = state {
492            match c {
493                '.' => {
494                    state = PrecInner;
495                    move_to!(next);
496                }
497                _ => {
498                    state = Length;
499                    precision = None;
500                    move_to!(at);
501                }
502            }
503        }
504
505        if let PrecInner = state {
506            match c {
507                '*' => {
508                    let end = at_next_cp_while(next, char::is_ascii_digit);
509                    match end.next_cp() {
510                        Some(('$', end2)) => {
511                            state = Length;
512                            precision = Num::from_str("*", next.slice_between(end));
513                            move_to!(end2);
514                        }
515                        _ => {
516                            state = Length;
517                            precision = Some(Num::Next);
518                            move_to!(end);
519                        }
520                    }
521                }
522                '0'..='9' => {
523                    let end = at_next_cp_while(next, char::is_ascii_digit);
524                    state = Length;
525                    precision = at.slice_between(end).and_then(|num| Num::from_str(num, None));
526                    move_to!(end);
527                }
528                _ => return fallback(),
529            }
530        }
531
532        if let Length = state {
533            let c1_next1 = next.next_cp();
534            match (c, c1_next1) {
535                ('h', Some(('h', next1))) | ('l', Some(('l', next1))) => {
536                    state = Type;
537                    length = Some(at.slice_between(next1).unwrap());
538                    move_to!(next1);
539                }
540
541                ('h' | 'l' | 'L' | 'z' | 'j' | 't' | 'q', _) => {
542                    state = Type;
543                    length = Some(at.slice_between(next).unwrap());
544                    move_to!(next);
545                }
546
547                ('I', _) => {
548                    let end = next
549                        .at_next_cp()
550                        .and_then(|end| end.at_next_cp())
551                        .map(|end| (next.slice_between(end).unwrap(), end));
552                    let end = match end {
553                        Some(("32" | "64", end)) => end,
554                        _ => next,
555                    };
556                    state = Type;
557                    length = Some(at.slice_between(end).unwrap());
558                    move_to!(end);
559                }
560
561                _ => {
562                    state = Type;
563                    length = None;
564                    move_to!(at);
565                }
566            }
567        }
568
569        if let Type = state {
570            type_ = at.slice_between(next).unwrap();
571
572            // Don't use `move_to!` here, as we *can* be at the end of the input.
573            at = next;
574        }
575
576        let _ = c; // to avoid never used value
577
578        end = at;
579        let position = InnerSpan::new(start.at, end.at);
580
581        let f = Format {
582            span: start.slice_between(end).unwrap(),
583            parameter,
584            flags,
585            width,
586            precision,
587            length,
588            type_,
589            position,
590        };
591        Some((Substitution::Format(f), end.slice_after()))
592    }
593
594    fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
595    where
596        F: FnMut(&char) -> bool,
597    {
598        loop {
599            match cur.next_cp() {
600                Some((c, next)) => {
601                    if pred(&c) {
602                        cur = next;
603                    } else {
604                        return cur;
605                    }
606                }
607                None => return cur,
608            }
609        }
610    }
611
612    fn is_flag(c: &char) -> bool {
613        matches!(c, '0' | '-' | '+' | ' ' | '#' | '\'')
614    }
615
616    #[cfg(test)]
617    mod tests;
618}
619
620pub(crate) mod shell {
621    use rustc_span::InnerSpan;
622
623    use super::strcursor::StrCursor as Cur;
624
625    #[derive(Clone, PartialEq, Debug)]
626    pub(crate) enum Substitution<'a> {
627        Ordinal(u8, (usize, usize)),
628        Name(&'a str, (usize, usize)),
629        Escape((usize, usize)),
630    }
631
632    impl ToString for Substitution<'_> {
633        fn to_string(&self) -> String {
634            match self {
635                Substitution::Ordinal(n, _) => format!("${n}"),
636                Substitution::Name(n, _) => format!("${n}"),
637                Substitution::Escape(_) => "$$".into(),
638            }
639        }
640    }
641
642    impl Substitution<'_> {
643        pub(crate) fn position(&self) -> InnerSpan {
644            let (Self::Ordinal(_, pos) | Self::Name(_, pos) | Self::Escape(pos)) = self;
645            InnerSpan::new(pos.0, pos.1)
646        }
647
648        fn set_position(&mut self, start: usize, end: usize) {
649            let (Self::Ordinal(_, pos) | Self::Name(_, pos) | Self::Escape(pos)) = self;
650            *pos = (start, end);
651        }
652
653        pub(crate) fn translate(&self) -> Result<String, Option<String>> {
654            match self {
655                Substitution::Ordinal(n, _) => Ok(format!("{{{}}}", n)),
656                Substitution::Name(n, _) => Ok(format!("{{{}}}", n)),
657                Substitution::Escape(_) => Err(None),
658            }
659        }
660    }
661
662    /// Returns an iterator over all substitutions in a given string.
663    pub(crate) fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
664        Substitutions { s, pos: start_pos }
665    }
666
667    /// Iterator over substitutions in a string.
668    pub(crate) struct Substitutions<'a> {
669        s: &'a str,
670        pos: usize,
671    }
672
673    impl<'a> Iterator for Substitutions<'a> {
674        type Item = Substitution<'a>;
675        fn next(&mut self) -> Option<Self::Item> {
676            let (mut sub, tail) = parse_next_substitution(self.s)?;
677            self.s = tail;
678            let InnerSpan { start, end } = sub.position();
679            sub.set_position(start + self.pos, end + self.pos);
680            self.pos += end;
681            Some(sub)
682        }
683
684        fn size_hint(&self) -> (usize, Option<usize>) {
685            (0, Some(self.s.len()))
686        }
687    }
688
689    /// Parse the next substitution from the input string.
690    fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
691        let at = {
692            let start = s.find('$')?;
693            match s[start + 1..].chars().next()? {
694                '$' => return Some((Substitution::Escape((start, start + 2)), &s[start + 2..])),
695                c @ '0'..='9' => {
696                    let n = (c as u8) - b'0';
697                    return Some((Substitution::Ordinal(n, (start, start + 2)), &s[start + 2..]));
698                }
699                _ => { /* fall-through */ }
700            }
701
702            Cur::new_at(s, start)
703        };
704
705        let at = at.at_next_cp()?;
706        let (c, inner) = at.next_cp()?;
707
708        if !is_ident_head(c) {
709            None
710        } else {
711            let end = at_next_cp_while(inner, is_ident_tail);
712            let slice = at.slice_between(end).unwrap();
713            let start = at.at - 1;
714            let end_pos = at.at + slice.len();
715            Some((Substitution::Name(slice, (start, end_pos)), end.slice_after()))
716        }
717    }
718
719    fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
720    where
721        F: FnMut(char) -> bool,
722    {
723        loop {
724            match cur.next_cp() {
725                Some((c, next)) => {
726                    if pred(c) {
727                        cur = next;
728                    } else {
729                        return cur;
730                    }
731                }
732                None => return cur,
733            }
734        }
735    }
736
737    fn is_ident_head(c: char) -> bool {
738        c.is_ascii_alphabetic() || c == '_'
739    }
740
741    fn is_ident_tail(c: char) -> bool {
742        c.is_ascii_alphanumeric() || c == '_'
743    }
744
745    #[cfg(test)]
746    mod tests;
747}
748
749mod strcursor {
750    pub(crate) struct StrCursor<'a> {
751        s: &'a str,
752        pub at: usize,
753    }
754
755    impl<'a> StrCursor<'a> {
756        pub(crate) fn new_at(s: &'a str, at: usize) -> StrCursor<'a> {
757            StrCursor { s, at }
758        }
759
760        pub(crate) fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
761            match self.try_seek_right_cp() {
762                true => Some(self),
763                false => None,
764            }
765        }
766
767        pub(crate) fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
768            let cp = self.cp_after()?;
769            self.seek_right(cp.len_utf8());
770            Some((cp, self))
771        }
772
773        fn slice_before(&self) -> &'a str {
774            &self.s[0..self.at]
775        }
776
777        pub(crate) fn slice_after(&self) -> &'a str {
778            &self.s[self.at..]
779        }
780
781        pub(crate) fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
782            if !str_eq_literal(self.s, until.s) {
783                None
784            } else {
785                use std::cmp::{max, min};
786                let beg = min(self.at, until.at);
787                let end = max(self.at, until.at);
788                Some(&self.s[beg..end])
789            }
790        }
791
792        fn cp_after(&self) -> Option<char> {
793            self.slice_after().chars().next()
794        }
795
796        fn try_seek_right_cp(&mut self) -> bool {
797            match self.slice_after().chars().next() {
798                Some(c) => {
799                    self.at += c.len_utf8();
800                    true
801                }
802                None => false,
803            }
804        }
805
806        fn seek_right(&mut self, bytes: usize) {
807            self.at += bytes;
808        }
809    }
810
811    impl Copy for StrCursor<'_> {}
812
813    impl<'a> Clone for StrCursor<'a> {
814        fn clone(&self) -> StrCursor<'a> {
815            *self
816        }
817    }
818
819    impl std::fmt::Debug for StrCursor<'_> {
820        fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
821            write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
822        }
823    }
824
825    fn str_eq_literal(a: &str, b: &str) -> bool {
826        a.as_bytes().as_ptr() == b.as_bytes().as_ptr() && a.len() == b.len()
827    }
828}