core/num/dec2flt/
float.rs

1//! Helper trait for generic float types.
2
3use crate::fmt::{Debug, LowerExp};
4use crate::num::FpCategory;
5use crate::ops::{Add, Div, Mul, Neg};
6
7/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
8///
9/// See the parent module's doc comment for why this is necessary.
10///
11/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
12#[doc(hidden)]
13pub trait RawFloat:
14    Sized
15    + Div<Output = Self>
16    + Neg<Output = Self>
17    + Mul<Output = Self>
18    + Add<Output = Self>
19    + LowerExp
20    + PartialEq
21    + PartialOrd
22    + Default
23    + Clone
24    + Copy
25    + Debug
26{
27    const INFINITY: Self;
28    const NEG_INFINITY: Self;
29    const NAN: Self;
30    const NEG_NAN: Self;
31
32    /// The number of bits in the significand, *excluding* the hidden bit.
33    const MANTISSA_EXPLICIT_BITS: usize;
34
35    // Round-to-even only happens for negative values of q
36    // when q ≥ −4 in the 64-bit case and when q ≥ −17 in
37    // the 32-bitcase.
38    //
39    // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40    // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41    // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
42    //
43    // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44    // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45    // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46    // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47    // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
48    //
49    // Thus we have that we only need to round ties to even when
50    // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51    // (in the 32-bit case). In both cases,the power of five(5^|q|)
52    // fits in a 64-bit word.
53    const MIN_EXPONENT_ROUND_TO_EVEN: i32;
54    const MAX_EXPONENT_ROUND_TO_EVEN: i32;
55
56    // Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57    const MIN_EXPONENT_FAST_PATH: i64;
58
59    // Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60    const MAX_EXPONENT_FAST_PATH: i64;
61
62    // Maximum exponent that can be represented for a disguised-fast path case.
63    // This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64    const MAX_EXPONENT_DISGUISED_FAST_PATH: i64;
65
66    // Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
67    const MINIMUM_EXPONENT: i32;
68
69    // Largest exponent value `(1 << EXP_BITS) - 1`.
70    const INFINITE_POWER: i32;
71
72    // Index (in bits) of the sign.
73    const SIGN_INDEX: usize;
74
75    // Smallest decimal exponent for a non-zero value.
76    const SMALLEST_POWER_OF_TEN: i32;
77
78    // Largest decimal exponent for a non-infinite value.
79    const LARGEST_POWER_OF_TEN: i32;
80
81    // Maximum mantissa for the fast-path (`1 << 53` for f64).
82    const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS;
83
84    /// Converts integer into float through an as cast.
85    /// This is only called in the fast-path algorithm, and therefore
86    /// will not lose precision, since the value will always have
87    /// only if the value is <= Self::MAX_MANTISSA_FAST_PATH.
88    fn from_u64(v: u64) -> Self;
89
90    /// Performs a raw transmutation from an integer.
91    fn from_u64_bits(v: u64) -> Self;
92
93    /// Gets a small power-of-ten for fast-path multiplication.
94    fn pow10_fast_path(exponent: usize) -> Self;
95
96    /// Returns the category that this number falls into.
97    fn classify(self) -> FpCategory;
98
99    /// Returns the mantissa, exponent and sign as integers.
100    fn integer_decode(self) -> (u64, i16, i8);
101}
102
103impl RawFloat for f32 {
104    const INFINITY: Self = f32::INFINITY;
105    const NEG_INFINITY: Self = f32::NEG_INFINITY;
106    const NAN: Self = f32::NAN;
107    const NEG_NAN: Self = -f32::NAN;
108
109    const MANTISSA_EXPLICIT_BITS: usize = 23;
110    const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
111    const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
112    const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
113    const MAX_EXPONENT_FAST_PATH: i64 = 10;
114    const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
115    const MINIMUM_EXPONENT: i32 = -127;
116    const INFINITE_POWER: i32 = 0xFF;
117    const SIGN_INDEX: usize = 31;
118    const SMALLEST_POWER_OF_TEN: i32 = -65;
119    const LARGEST_POWER_OF_TEN: i32 = 38;
120
121    #[inline]
122    fn from_u64(v: u64) -> Self {
123        debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
124        v as _
125    }
126
127    #[inline]
128    fn from_u64_bits(v: u64) -> Self {
129        f32::from_bits((v & 0xFFFFFFFF) as u32)
130    }
131
132    fn pow10_fast_path(exponent: usize) -> Self {
133        #[allow(clippy::use_self)]
134        const TABLE: [f32; 16] =
135            [1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 0., 0., 0., 0., 0.];
136        TABLE[exponent & 15]
137    }
138
139    /// Returns the mantissa, exponent and sign as integers.
140    fn integer_decode(self) -> (u64, i16, i8) {
141        let bits = self.to_bits();
142        let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
143        let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
144        let mantissa =
145            if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
146        // Exponent bias + mantissa shift
147        exponent -= 127 + 23;
148        (mantissa as u64, exponent, sign)
149    }
150
151    fn classify(self) -> FpCategory {
152        self.classify()
153    }
154}
155
156impl RawFloat for f64 {
157    const INFINITY: Self = f64::INFINITY;
158    const NEG_INFINITY: Self = f64::NEG_INFINITY;
159    const NAN: Self = f64::NAN;
160    const NEG_NAN: Self = -f64::NAN;
161
162    const MANTISSA_EXPLICIT_BITS: usize = 52;
163    const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
164    const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
165    const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
166    const MAX_EXPONENT_FAST_PATH: i64 = 22;
167    const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
168    const MINIMUM_EXPONENT: i32 = -1023;
169    const INFINITE_POWER: i32 = 0x7FF;
170    const SIGN_INDEX: usize = 63;
171    const SMALLEST_POWER_OF_TEN: i32 = -342;
172    const LARGEST_POWER_OF_TEN: i32 = 308;
173
174    #[inline]
175    fn from_u64(v: u64) -> Self {
176        debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
177        v as _
178    }
179
180    #[inline]
181    fn from_u64_bits(v: u64) -> Self {
182        f64::from_bits(v)
183    }
184
185    fn pow10_fast_path(exponent: usize) -> Self {
186        const TABLE: [f64; 32] = [
187            1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
188            1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 0., 0., 0., 0., 0., 0., 0., 0., 0.,
189        ];
190        TABLE[exponent & 31]
191    }
192
193    /// Returns the mantissa, exponent and sign as integers.
194    fn integer_decode(self) -> (u64, i16, i8) {
195        let bits = self.to_bits();
196        let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
197        let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
198        let mantissa = if exponent == 0 {
199            (bits & 0xfffffffffffff) << 1
200        } else {
201            (bits & 0xfffffffffffff) | 0x10000000000000
202        };
203        // Exponent bias + mantissa shift
204        exponent -= 1023 + 52;
205        (mantissa, exponent, sign)
206    }
207
208    fn classify(self) -> FpCategory {
209        self.classify()
210    }
211}