core/num/dec2flt/
slow.rs

1//! Slow, fallback algorithm for cases the Eisel-Lemire algorithm cannot round.
2
3use crate::num::dec2flt::common::BiasedFp;
4use crate::num::dec2flt::decimal_seq::{DecimalSeq, parse_decimal_seq};
5use crate::num::dec2flt::float::RawFloat;
6
7/// Parse the significant digits and biased, binary exponent of a float.
8///
9/// This is a fallback algorithm that uses a big-integer representation
10/// of the float, and therefore is considerably slower than faster
11/// approximations. However, it will always determine how to round
12/// the significant digits to the nearest machine float, allowing
13/// use to handle near half-way cases.
14///
15/// Near half-way cases are halfway between two consecutive machine floats.
16/// For example, the float `16777217.0` has a bitwise representation of
17/// `100000000000000000000000 1`. Rounding to a single-precision float,
18/// the trailing `1` is truncated. Using round-nearest, tie-even, any
19/// value above `16777217.0` must be rounded up to `16777218.0`, while
20/// any value before or equal to `16777217.0` must be rounded down
21/// to `16777216.0`. These near-halfway conversions therefore may require
22/// a large number of digits to unambiguously determine how to round.
23///
24/// The algorithms described here are based on "Processing Long Numbers Quickly",
25/// available here: <https://arxiv.org/pdf/2101.11408.pdf#section.11>.
26pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp {
27    const MAX_SHIFT: usize = 60;
28    const NUM_POWERS: usize = 19;
29    const POWERS: [u8; 19] =
30        [0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59];
31
32    let get_shift = |n| {
33        if n < NUM_POWERS { POWERS[n] as usize } else { MAX_SHIFT }
34    };
35
36    let fp_zero = BiasedFp::zero_pow2(0);
37    let fp_inf = BiasedFp::zero_pow2(F::INFINITE_POWER);
38
39    let mut d = parse_decimal_seq(s);
40
41    // Short-circuit if the value can only be a literal 0 or infinity.
42    if d.num_digits == 0 || d.decimal_point < -324 {
43        return fp_zero;
44    } else if d.decimal_point >= 310 {
45        return fp_inf;
46    }
47    let mut exp2 = 0_i32;
48    // Shift right toward (1/2 ... 1].
49    while d.decimal_point > 0 {
50        let n = d.decimal_point as usize;
51        let shift = get_shift(n);
52        d.right_shift(shift);
53        if d.decimal_point < -DecimalSeq::DECIMAL_POINT_RANGE {
54            return fp_zero;
55        }
56        exp2 += shift as i32;
57    }
58    // Shift left toward (1/2 ... 1].
59    while d.decimal_point <= 0 {
60        let shift = if d.decimal_point == 0 {
61            match d.digits[0] {
62                digit if digit >= 5 => break,
63                0 | 1 => 2,
64                _ => 1,
65            }
66        } else {
67            get_shift((-d.decimal_point) as _)
68        };
69        d.left_shift(shift);
70        if d.decimal_point > DecimalSeq::DECIMAL_POINT_RANGE {
71            return fp_inf;
72        }
73        exp2 -= shift as i32;
74    }
75    // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2].
76    exp2 -= 1;
77    while F::EXP_MIN > exp2 {
78        let mut n = (F::EXP_MIN - exp2) as usize;
79        if n > MAX_SHIFT {
80            n = MAX_SHIFT;
81        }
82        d.right_shift(n);
83        exp2 += n as i32;
84    }
85    if (exp2 - F::EXP_MIN + 1) >= F::INFINITE_POWER {
86        return fp_inf;
87    }
88    // Shift the decimal to the hidden bit, and then round the value
89    // to get the high mantissa+1 bits.
90    d.left_shift(F::SIG_BITS as usize + 1);
91    let mut mantissa = d.round();
92    if mantissa >= (1_u64 << (F::SIG_BITS + 1)) {
93        // Rounding up overflowed to the carry bit, need to
94        // shift back to the hidden bit.
95        d.right_shift(1);
96        exp2 += 1;
97        mantissa = d.round();
98        if (exp2 - F::EXP_MIN + 1) >= F::INFINITE_POWER {
99            return fp_inf;
100        }
101    }
102    let mut power2 = exp2 - F::EXP_MIN + 1;
103    if mantissa < (1_u64 << F::SIG_BITS) {
104        power2 -= 1;
105    }
106    // Zero out all the bits above the explicit mantissa bits.
107    mantissa &= (1_u64 << F::SIG_BITS) - 1;
108    BiasedFp { m: mantissa, p_biased: power2 }
109}