core/num/dec2flt/
decimal.rs

1//! Arbitrary-precision decimal class for fallback algorithms.
2//!
3//! This is only used if the fast-path (native floats) and
4//! the Eisel-Lemire algorithm are unable to unambiguously
5//! determine the float.
6//!
7//! The technique used is "Simple Decimal Conversion", developed
8//! by Nigel Tao and Ken Thompson. A detailed description of the
9//! algorithm can be found in "ParseNumberF64 by Simple Decimal Conversion",
10//! available online: <https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html>.
11
12use crate::num::dec2flt::common::{ByteSlice, is_8digits};
13
14#[derive(Clone)]
15pub(super) struct Decimal {
16    /// The number of significant digits in the decimal.
17    pub num_digits: usize,
18    /// The offset of the decimal point in the significant digits.
19    pub decimal_point: i32,
20    /// If the number of significant digits stored in the decimal is truncated.
21    pub truncated: bool,
22    /// Buffer of the raw digits, in the range [0, 9].
23    pub digits: [u8; Self::MAX_DIGITS],
24}
25
26impl Default for Decimal {
27    fn default() -> Self {
28        Self { num_digits: 0, decimal_point: 0, truncated: false, digits: [0; Self::MAX_DIGITS] }
29    }
30}
31
32impl Decimal {
33    /// The maximum number of digits required to unambiguously round a float.
34    ///
35    /// For a double-precision IEEE 754 float, this required 767 digits,
36    /// so we store the max digits + 1.
37    ///
38    /// We can exactly represent a float in radix `b` from radix 2 if
39    /// `b` is divisible by 2. This function calculates the exact number of
40    /// digits required to exactly represent that float.
41    ///
42    /// According to the "Handbook of Floating Point Arithmetic",
43    /// for IEEE754, with emin being the min exponent, p2 being the
44    /// precision, and b being the radix, the number of digits follows as:
45    ///
46    /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋`
47    ///
48    /// For f32, this follows as:
49    ///     emin = -126
50    ///     p2 = 24
51    ///
52    /// For f64, this follows as:
53    ///     emin = -1022
54    ///     p2 = 53
55    ///
56    /// In Python:
57    ///     `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))`
58    pub(super) const MAX_DIGITS: usize = 768;
59    /// The max digits that can be exactly represented in a 64-bit integer.
60    pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19;
61    pub(super) const DECIMAL_POINT_RANGE: i32 = 2047;
62
63    /// Append a digit to the buffer.
64    pub(super) fn try_add_digit(&mut self, digit: u8) {
65        if self.num_digits < Self::MAX_DIGITS {
66            self.digits[self.num_digits] = digit;
67        }
68        self.num_digits += 1;
69    }
70
71    /// Trim trailing zeros from the buffer.
72    pub(super) fn trim(&mut self) {
73        // All of the following calls to `Decimal::trim` can't panic because:
74        //
75        //  1. `parse_decimal` sets `num_digits` to a max of `Decimal::MAX_DIGITS`.
76        //  2. `right_shift` sets `num_digits` to `write_index`, which is bounded by `num_digits`.
77        //  3. `left_shift` `num_digits` to a max of `Decimal::MAX_DIGITS`.
78        //
79        // Trim is only called in `right_shift` and `left_shift`.
80        debug_assert!(self.num_digits <= Self::MAX_DIGITS);
81        while self.num_digits != 0 && self.digits[self.num_digits - 1] == 0 {
82            self.num_digits -= 1;
83        }
84    }
85
86    pub(super) fn round(&self) -> u64 {
87        if self.num_digits == 0 || self.decimal_point < 0 {
88            return 0;
89        } else if self.decimal_point > 18 {
90            return 0xFFFF_FFFF_FFFF_FFFF_u64;
91        }
92        let dp = self.decimal_point as usize;
93        let mut n = 0_u64;
94        for i in 0..dp {
95            n *= 10;
96            if i < self.num_digits {
97                n += self.digits[i] as u64;
98            }
99        }
100        let mut round_up = false;
101        if dp < self.num_digits {
102            round_up = self.digits[dp] >= 5;
103            if self.digits[dp] == 5 && dp + 1 == self.num_digits {
104                round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0))
105            }
106        }
107        if round_up {
108            n += 1;
109        }
110        n
111    }
112
113    /// Computes decimal * 2^shift.
114    pub(super) fn left_shift(&mut self, shift: usize) {
115        if self.num_digits == 0 {
116            return;
117        }
118        let num_new_digits = number_of_digits_decimal_left_shift(self, shift);
119        let mut read_index = self.num_digits;
120        let mut write_index = self.num_digits + num_new_digits;
121        let mut n = 0_u64;
122        while read_index != 0 {
123            read_index -= 1;
124            write_index -= 1;
125            n += (self.digits[read_index] as u64) << shift;
126            let quotient = n / 10;
127            let remainder = n - (10 * quotient);
128            if write_index < Self::MAX_DIGITS {
129                self.digits[write_index] = remainder as u8;
130            } else if remainder > 0 {
131                self.truncated = true;
132            }
133            n = quotient;
134        }
135        while n > 0 {
136            write_index -= 1;
137            let quotient = n / 10;
138            let remainder = n - (10 * quotient);
139            if write_index < Self::MAX_DIGITS {
140                self.digits[write_index] = remainder as u8;
141            } else if remainder > 0 {
142                self.truncated = true;
143            }
144            n = quotient;
145        }
146        self.num_digits += num_new_digits;
147        if self.num_digits > Self::MAX_DIGITS {
148            self.num_digits = Self::MAX_DIGITS;
149        }
150        self.decimal_point += num_new_digits as i32;
151        self.trim();
152    }
153
154    /// Computes decimal * 2^-shift.
155    pub(super) fn right_shift(&mut self, shift: usize) {
156        let mut read_index = 0;
157        let mut write_index = 0;
158        let mut n = 0_u64;
159        while (n >> shift) == 0 {
160            if read_index < self.num_digits {
161                n = (10 * n) + self.digits[read_index] as u64;
162                read_index += 1;
163            } else if n == 0 {
164                return;
165            } else {
166                while (n >> shift) == 0 {
167                    n *= 10;
168                    read_index += 1;
169                }
170                break;
171            }
172        }
173        self.decimal_point -= read_index as i32 - 1;
174        if self.decimal_point < -Self::DECIMAL_POINT_RANGE {
175            // `self = Self::Default()`, but without the overhead of clearing `digits`.
176            self.num_digits = 0;
177            self.decimal_point = 0;
178            self.truncated = false;
179            return;
180        }
181        let mask = (1_u64 << shift) - 1;
182        while read_index < self.num_digits {
183            let new_digit = (n >> shift) as u8;
184            n = (10 * (n & mask)) + self.digits[read_index] as u64;
185            read_index += 1;
186            self.digits[write_index] = new_digit;
187            write_index += 1;
188        }
189        while n > 0 {
190            let new_digit = (n >> shift) as u8;
191            n = 10 * (n & mask);
192            if write_index < Self::MAX_DIGITS {
193                self.digits[write_index] = new_digit;
194                write_index += 1;
195            } else if new_digit > 0 {
196                self.truncated = true;
197            }
198        }
199        self.num_digits = write_index;
200        self.trim();
201    }
202}
203
204/// Parse a big integer representation of the float as a decimal.
205pub(super) fn parse_decimal(mut s: &[u8]) -> Decimal {
206    let mut d = Decimal::default();
207    let start = s;
208
209    while let Some((&b'0', s_next)) = s.split_first() {
210        s = s_next;
211    }
212
213    s = s.parse_digits(|digit| d.try_add_digit(digit));
214
215    if let Some((b'.', s_next)) = s.split_first() {
216        s = s_next;
217        let first = s;
218        // Skip leading zeros.
219        if d.num_digits == 0 {
220            while let Some((&b'0', s_next)) = s.split_first() {
221                s = s_next;
222            }
223        }
224        while s.len() >= 8 && d.num_digits + 8 < Decimal::MAX_DIGITS {
225            let v = s.read_u64();
226            if !is_8digits(v) {
227                break;
228            }
229            d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
230            d.num_digits += 8;
231            s = &s[8..];
232        }
233        s = s.parse_digits(|digit| d.try_add_digit(digit));
234        d.decimal_point = s.len() as i32 - first.len() as i32;
235    }
236    if d.num_digits != 0 {
237        // Ignore the trailing zeros if there are any
238        let mut n_trailing_zeros = 0;
239        for &c in start[..(start.len() - s.len())].iter().rev() {
240            if c == b'0' {
241                n_trailing_zeros += 1;
242            } else if c != b'.' {
243                break;
244            }
245        }
246        d.decimal_point += n_trailing_zeros as i32;
247        d.num_digits -= n_trailing_zeros;
248        d.decimal_point += d.num_digits as i32;
249        if d.num_digits > Decimal::MAX_DIGITS {
250            d.truncated = true;
251            d.num_digits = Decimal::MAX_DIGITS;
252        }
253    }
254    if let Some((&ch, s_next)) = s.split_first() {
255        if ch == b'e' || ch == b'E' {
256            s = s_next;
257            let mut neg_exp = false;
258            if let Some((&ch, s_next)) = s.split_first() {
259                neg_exp = ch == b'-';
260                if ch == b'-' || ch == b'+' {
261                    s = s_next;
262                }
263            }
264            let mut exp_num = 0_i32;
265
266            s.parse_digits(|digit| {
267                if exp_num < 0x10000 {
268                    exp_num = 10 * exp_num + digit as i32;
269                }
270            });
271
272            d.decimal_point += if neg_exp { -exp_num } else { exp_num };
273        }
274    }
275    for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
276        d.digits[i] = 0;
277    }
278    d
279}
280
281fn number_of_digits_decimal_left_shift(d: &Decimal, mut shift: usize) -> usize {
282    #[rustfmt::skip]
283    const TABLE: [u16; 65] = [
284        0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, 0x181D, 0x2024,
285        0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, 0x3073, 0x3080, 0x388E, 0x389C,
286        0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169,
287        0x5180, 0x5998, 0x59B0, 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B,
288        0x72AA, 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, 0x8C02,
289        0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, 0x051C, 0x051C,
290    ];
291    #[rustfmt::skip]
292    const TABLE_POW5: [u8; 0x051C] = [
293        5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3, 9, 0, 6, 2, 5, 1,
294        9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5,
295        1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2,
296        5, 1, 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6,
297        9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1,
298        6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9,
299        1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, 0, 4, 6,
300        4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1,
301        4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2,
302        3, 8, 2, 8, 1, 2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6,
303        2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5, 7, 4, 6, 1, 5,
304        4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2,
305        5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5,
306        3, 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4,
307        6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6,
308        1, 3, 2, 8, 1, 2, 5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0,
309        6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, 1, 2, 5, 3,
310        6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8,
311        9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4,
312        7, 0, 1, 7, 7, 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5,
313        0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7, 3, 7, 3, 6, 7, 5,
314        4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7,
315        7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
316        8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0,
317        9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0,
318        8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1,
319        0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, 5, 7, 8, 1, 2,
320        5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8,
321        9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0,
322        6, 6, 8, 9, 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3,
323        8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8, 5, 0, 0, 6, 2,
324        6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4,
325        9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, 1,
326        1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8,
327        2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5,
328        8, 3, 4, 0, 4, 5, 4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1,
329        3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, 3, 8, 7, 7, 7, 8,
330        7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0,
331        6, 2, 5, 6, 9, 3, 8, 8, 9, 3, 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2,
332        5, 5, 6, 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1,
333        8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2,
334        3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3,
335        8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2,
336        2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
337    ];
338
339    shift &= 63;
340    let x_a = TABLE[shift];
341    let x_b = TABLE[shift + 1];
342    let num_new_digits = (x_a >> 11) as _;
343    let pow5_a = (0x7FF & x_a) as usize;
344    let pow5_b = (0x7FF & x_b) as usize;
345    let pow5 = &TABLE_POW5[pow5_a..];
346    for (i, &p5) in pow5.iter().enumerate().take(pow5_b - pow5_a) {
347        if i >= d.num_digits {
348            return num_new_digits - 1;
349        } else if d.digits[i] == p5 {
350            continue;
351        } else if d.digits[i] < p5 {
352            return num_new_digits - 1;
353        } else {
354            return num_new_digits;
355        }
356    }
357    num_new_digits
358}