core/num/dec2flt/
decimal_seq.rs

1//! Arbitrary-precision decimal type used by fallback algorithms.
2//!
3//! This is only used if the fast-path (native floats) and
4//! the Eisel-Lemire algorithm are unable to unambiguously
5//! determine the float.
6//!
7//! The technique used is "Simple Decimal Conversion", developed
8//! by Nigel Tao and Ken Thompson. A detailed description of the
9//! algorithm can be found in "ParseNumberF64 by Simple Decimal Conversion",
10//! available online: <https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html>.
11
12use crate::num::dec2flt::common::{ByteSlice, is_8digits};
13
14/// A decimal floating-point number, represented as a sequence of decimal digits.
15#[derive(Clone, Debug, PartialEq)]
16pub struct DecimalSeq {
17    /// The number of significant digits in the decimal.
18    pub num_digits: usize,
19    /// The offset of the decimal point in the significant digits.
20    pub decimal_point: i32,
21    /// If the number of significant digits stored in the decimal is truncated.
22    pub truncated: bool,
23    /// Buffer of the raw digits, in the range [0, 9].
24    pub digits: [u8; Self::MAX_DIGITS],
25}
26
27impl Default for DecimalSeq {
28    fn default() -> Self {
29        Self { num_digits: 0, decimal_point: 0, truncated: false, digits: [0; Self::MAX_DIGITS] }
30    }
31}
32
33impl DecimalSeq {
34    /// The maximum number of digits required to unambiguously round up to a 64-bit float.
35    ///
36    /// For an IEEE 754 binary64 float, this required 767 digits. So we store the max digits + 1.
37    ///
38    /// We can exactly represent a float in radix `b` from radix 2 if
39    /// `b` is divisible by 2. This function calculates the exact number of
40    /// digits required to exactly represent that float.
41    ///
42    /// According to the "Handbook of Floating Point Arithmetic",
43    /// for IEEE754, with `emin` being the min exponent, `p2` being the
44    /// precision, and `b` being the radix, the number of digits follows as:
45    ///
46    /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋`
47    ///
48    /// For f32, this follows as:
49    ///     emin = -126
50    ///     p2 = 24
51    ///
52    /// For f64, this follows as:
53    ///     emin = -1022
54    ///     p2 = 53
55    ///
56    /// In Python:
57    ///     `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))`
58    pub const MAX_DIGITS: usize = 768;
59
60    /// The max decimal digits that can be exactly represented in a 64-bit integer.
61    pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19;
62    pub(super) const DECIMAL_POINT_RANGE: i32 = 2047;
63
64    /// Append a digit to the buffer if it fits.
65    // FIXME(tgross35): it may be better for this to return an option
66    // FIXME(tgross35): incrementing the digit counter even if we don't push anything
67    // seems incorrect.
68    pub(super) fn try_add_digit(&mut self, digit: u8) {
69        if self.num_digits < Self::MAX_DIGITS {
70            self.digits[self.num_digits] = digit;
71        }
72        self.num_digits += 1;
73    }
74
75    /// Trim trailing zeros from the buffer.
76    // FIXME(tgross35): this could be `.rev().position()` if perf is okay
77    pub fn trim(&mut self) {
78        // All of the following calls to `DecimalSeq::trim` can't panic because:
79        //
80        //  1. `parse_decimal` sets `num_digits` to a max of `DecimalSeq::MAX_DIGITS`.
81        //  2. `right_shift` sets `num_digits` to `write_index`, which is bounded by `num_digits`.
82        //  3. `left_shift` `num_digits` to a max of `DecimalSeq::MAX_DIGITS`.
83        //
84        // Trim is only called in `right_shift` and `left_shift`.
85        debug_assert!(self.num_digits <= Self::MAX_DIGITS);
86        while self.num_digits != 0 && self.digits[self.num_digits - 1] == 0 {
87            self.num_digits -= 1;
88        }
89    }
90
91    pub(super) fn round(&self) -> u64 {
92        if self.num_digits == 0 || self.decimal_point < 0 {
93            return 0;
94        } else if self.decimal_point >= Self::MAX_DIGITS_WITHOUT_OVERFLOW as i32 {
95            return 0xFFFF_FFFF_FFFF_FFFF_u64;
96        }
97
98        let dp = self.decimal_point as usize;
99        let mut n = 0_u64;
100
101        for i in 0..dp {
102            n *= 10;
103            if i < self.num_digits {
104                n += self.digits[i] as u64;
105            }
106        }
107
108        let mut round_up = false;
109
110        if dp < self.num_digits {
111            round_up = self.digits[dp] >= 5;
112            if self.digits[dp] == 5 && dp + 1 == self.num_digits {
113                round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0))
114            }
115        }
116
117        if round_up {
118            n += 1;
119        }
120        n
121    }
122
123    /// Computes decimal * 2^shift.
124    pub(super) fn left_shift(&mut self, shift: usize) {
125        if self.num_digits == 0 {
126            return;
127        }
128        let num_new_digits = number_of_digits_decimal_left_shift(self, shift);
129        let mut read_index = self.num_digits;
130        let mut write_index = self.num_digits + num_new_digits;
131        let mut n = 0_u64;
132
133        while read_index != 0 {
134            read_index -= 1;
135            write_index -= 1;
136            n += (self.digits[read_index] as u64) << shift;
137            let quotient = n / 10;
138            let remainder = n - (10 * quotient);
139            if write_index < Self::MAX_DIGITS {
140                self.digits[write_index] = remainder as u8;
141            } else if remainder > 0 {
142                self.truncated = true;
143            }
144            n = quotient;
145        }
146
147        while n > 0 {
148            write_index -= 1;
149            let quotient = n / 10;
150            let remainder = n - (10 * quotient);
151            if write_index < Self::MAX_DIGITS {
152                self.digits[write_index] = remainder as u8;
153            } else if remainder > 0 {
154                self.truncated = true;
155            }
156            n = quotient;
157        }
158
159        self.num_digits += num_new_digits;
160
161        if self.num_digits > Self::MAX_DIGITS {
162            self.num_digits = Self::MAX_DIGITS;
163        }
164
165        self.decimal_point += num_new_digits as i32;
166        self.trim();
167    }
168
169    /// Computes decimal * 2^-shift.
170    pub(super) fn right_shift(&mut self, shift: usize) {
171        let mut read_index = 0;
172        let mut write_index = 0;
173        let mut n = 0_u64;
174        while (n >> shift) == 0 {
175            if read_index < self.num_digits {
176                n = (10 * n) + self.digits[read_index] as u64;
177                read_index += 1;
178            } else if n == 0 {
179                return;
180            } else {
181                while (n >> shift) == 0 {
182                    n *= 10;
183                    read_index += 1;
184                }
185                break;
186            }
187        }
188        self.decimal_point -= read_index as i32 - 1;
189        if self.decimal_point < -Self::DECIMAL_POINT_RANGE {
190            // `self = Self::Default()`, but without the overhead of clearing `digits`.
191            self.num_digits = 0;
192            self.decimal_point = 0;
193            self.truncated = false;
194            return;
195        }
196        let mask = (1_u64 << shift) - 1;
197        while read_index < self.num_digits {
198            let new_digit = (n >> shift) as u8;
199            n = (10 * (n & mask)) + self.digits[read_index] as u64;
200            read_index += 1;
201            self.digits[write_index] = new_digit;
202            write_index += 1;
203        }
204        while n > 0 {
205            let new_digit = (n >> shift) as u8;
206            n = 10 * (n & mask);
207            if write_index < Self::MAX_DIGITS {
208                self.digits[write_index] = new_digit;
209                write_index += 1;
210            } else if new_digit > 0 {
211                self.truncated = true;
212            }
213        }
214        self.num_digits = write_index;
215        self.trim();
216    }
217}
218
219/// Parse a big integer representation of the float as a decimal.
220pub fn parse_decimal_seq(mut s: &[u8]) -> DecimalSeq {
221    let mut d = DecimalSeq::default();
222    let start = s;
223
224    while let Some((&b'0', s_next)) = s.split_first() {
225        s = s_next;
226    }
227
228    s = s.parse_digits(|digit| d.try_add_digit(digit));
229
230    if let Some((b'.', s_next)) = s.split_first() {
231        s = s_next;
232        let first = s;
233        // Skip leading zeros.
234        if d.num_digits == 0 {
235            while let Some((&b'0', s_next)) = s.split_first() {
236                s = s_next;
237            }
238        }
239        while s.len() >= 8 && d.num_digits + 8 < DecimalSeq::MAX_DIGITS {
240            let v = s.read_u64();
241            if !is_8digits(v) {
242                break;
243            }
244            d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
245            d.num_digits += 8;
246            s = &s[8..];
247        }
248        s = s.parse_digits(|digit| d.try_add_digit(digit));
249        d.decimal_point = s.len() as i32 - first.len() as i32;
250    }
251
252    if d.num_digits != 0 {
253        // Ignore the trailing zeros if there are any
254        let mut n_trailing_zeros = 0;
255        for &c in start[..(start.len() - s.len())].iter().rev() {
256            if c == b'0' {
257                n_trailing_zeros += 1;
258            } else if c != b'.' {
259                break;
260            }
261        }
262        d.decimal_point += n_trailing_zeros as i32;
263        d.num_digits -= n_trailing_zeros;
264        d.decimal_point += d.num_digits as i32;
265        if d.num_digits > DecimalSeq::MAX_DIGITS {
266            d.truncated = true;
267            d.num_digits = DecimalSeq::MAX_DIGITS;
268        }
269    }
270
271    if let Some((&ch, s_next)) = s.split_first() {
272        if ch == b'e' || ch == b'E' {
273            s = s_next;
274            let mut neg_exp = false;
275            if let Some((&ch, s_next)) = s.split_first() {
276                neg_exp = ch == b'-';
277                if ch == b'-' || ch == b'+' {
278                    s = s_next;
279                }
280            }
281            let mut exp_num = 0_i32;
282
283            s.parse_digits(|digit| {
284                if exp_num < 0x10000 {
285                    exp_num = 10 * exp_num + digit as i32;
286                }
287            });
288
289            d.decimal_point += if neg_exp { -exp_num } else { exp_num };
290        }
291    }
292
293    for i in d.num_digits..DecimalSeq::MAX_DIGITS_WITHOUT_OVERFLOW {
294        d.digits[i] = 0;
295    }
296
297    d
298}
299
300fn number_of_digits_decimal_left_shift(d: &DecimalSeq, mut shift: usize) -> usize {
301    #[rustfmt::skip]
302    const TABLE: [u16; 65] = [
303        0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, 0x181D, 0x2024,
304        0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, 0x3073, 0x3080, 0x388E, 0x389C,
305        0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169,
306        0x5180, 0x5998, 0x59B0, 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B,
307        0x72AA, 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, 0x8C02,
308        0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, 0x051C, 0x051C,
309    ];
310    #[rustfmt::skip]
311    const TABLE_POW5: [u8; 0x051C] = [
312        5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3, 9, 0, 6, 2, 5, 1,
313        9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5,
314        1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2,
315        5, 1, 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6,
316        9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1,
317        6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9,
318        1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, 0, 4, 6,
319        4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1,
320        4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2,
321        3, 8, 2, 8, 1, 2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6,
322        2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5, 7, 4, 6, 1, 5,
323        4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2,
324        5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5,
325        3, 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4,
326        6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6,
327        1, 3, 2, 8, 1, 2, 5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0,
328        6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, 1, 2, 5, 3,
329        6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8,
330        9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4,
331        7, 0, 1, 7, 7, 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5,
332        0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7, 3, 7, 3, 6, 7, 5,
333        4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7,
334        7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
335        8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0,
336        9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0,
337        8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1,
338        0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, 5, 7, 8, 1, 2,
339        5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8,
340        9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0,
341        6, 6, 8, 9, 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3,
342        8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8, 5, 0, 0, 6, 2,
343        6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4,
344        9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, 1,
345        1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8,
346        2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5,
347        8, 3, 4, 0, 4, 5, 4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1,
348        3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, 3, 8, 7, 7, 7, 8,
349        7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0,
350        6, 2, 5, 6, 9, 3, 8, 8, 9, 3, 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2,
351        5, 5, 6, 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1,
352        8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2,
353        3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3,
354        8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2,
355        2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
356    ];
357
358    shift &= 63;
359    let x_a = TABLE[shift];
360    let x_b = TABLE[shift + 1];
361    let num_new_digits = (x_a >> 11) as _;
362    let pow5_a = (0x7FF & x_a) as usize;
363    let pow5_b = (0x7FF & x_b) as usize;
364    let pow5 = &TABLE_POW5[pow5_a..];
365
366    for (i, &p5) in pow5.iter().enumerate().take(pow5_b - pow5_a) {
367        if i >= d.num_digits {
368            return num_new_digits - 1;
369        } else if d.digits[i] == p5 {
370            continue;
371        } else if d.digits[i] < p5 {
372            return num_new_digits - 1;
373        } else {
374            return num_new_digits;
375        }
376    }
377
378    num_new_digits
379}
core/num/dec2flt/decimal_seq.rs

core/num/dec2flt/
decimal_seq.rs