core/num/flt2dec/strategy/
grisu.rs

Help
1//! Rust adaptation of the Grisu3 algorithm described in "Printing Floating-Point Numbers Quickly
2//! and Accurately with Integers"[^1]. It uses about 1KB of precomputed table, and in turn, it's
3//! very quick for most inputs.
4//!
5//! [^1]: Florian Loitsch. 2010. Printing floating-point numbers quickly and
6//!   accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
7
8use crate::mem::MaybeUninit;
9use crate::num::diy_float::Fp;
10use crate::num::flt2dec::{Decoded, MAX_SIG_DIGITS, round_up};
11
12// see the comments in `format_shortest_opt` for the rationale.
13#[doc(hidden)]
14pub const ALPHA: i16 = -60;
15#[doc(hidden)]
16pub const GAMMA: i16 = -32;
17
18/*
19# the following Python code generates this table:
20for i in xrange(-308, 333, 8):
21    if i >= 0: f = 10**i; e = 0
22    else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80
23    l = f.bit_length()
24    f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
25    print '    (%#018x, %5d, %4d),' % (f, e, i)
26*/
27
28#[doc(hidden)]
29pub static CACHED_POW10: [(u64, i16, i16); 81] = [
30    // (f, e, k)
31    (0xe61acf033d1a45df, -1087, -308),
32    (0xab70fe17c79ac6ca, -1060, -300),
33    (0xff77b1fcbebcdc4f, -1034, -292),
34    (0xbe5691ef416bd60c, -1007, -284),
35    (0x8dd01fad907ffc3c, -980, -276),
36    (0xd3515c2831559a83, -954, -268),
37    (0x9d71ac8fada6c9b5, -927, -260),
38    (0xea9c227723ee8bcb, -901, -252),
39    (0xaecc49914078536d, -874, -244),
40    (0x823c12795db6ce57, -847, -236),
41    (0xc21094364dfb5637, -821, -228),
42    (0x9096ea6f3848984f, -794, -220),
43    (0xd77485cb25823ac7, -768, -212),
44    (0xa086cfcd97bf97f4, -741, -204),
45    (0xef340a98172aace5, -715, -196),
46    (0xb23867fb2a35b28e, -688, -188),
47    (0x84c8d4dfd2c63f3b, -661, -180),
48    (0xc5dd44271ad3cdba, -635, -172),
49    (0x936b9fcebb25c996, -608, -164),
50    (0xdbac6c247d62a584, -582, -156),
51    (0xa3ab66580d5fdaf6, -555, -148),
52    (0xf3e2f893dec3f126, -529, -140),
53    (0xb5b5ada8aaff80b8, -502, -132),
54    (0x87625f056c7c4a8b, -475, -124),
55    (0xc9bcff6034c13053, -449, -116),
56    (0x964e858c91ba2655, -422, -108),
57    (0xdff9772470297ebd, -396, -100),
58    (0xa6dfbd9fb8e5b88f, -369, -92),
59    (0xf8a95fcf88747d94, -343, -84),
60    (0xb94470938fa89bcf, -316, -76),
61    (0x8a08f0f8bf0f156b, -289, -68),
62    (0xcdb02555653131b6, -263, -60),
63    (0x993fe2c6d07b7fac, -236, -52),
64    (0xe45c10c42a2b3b06, -210, -44),
65    (0xaa242499697392d3, -183, -36),
66    (0xfd87b5f28300ca0e, -157, -28),
67    (0xbce5086492111aeb, -130, -20),
68    (0x8cbccc096f5088cc, -103, -12),
69    (0xd1b71758e219652c, -77, -4),
70    (0x9c40000000000000, -50, 4),
71    (0xe8d4a51000000000, -24, 12),
72    (0xad78ebc5ac620000, 3, 20),
73    (0x813f3978f8940984, 30, 28),
74    (0xc097ce7bc90715b3, 56, 36),
75    (0x8f7e32ce7bea5c70, 83, 44),
76    (0xd5d238a4abe98068, 109, 52),
77    (0x9f4f2726179a2245, 136, 60),
78    (0xed63a231d4c4fb27, 162, 68),
79    (0xb0de65388cc8ada8, 189, 76),
80    (0x83c7088e1aab65db, 216, 84),
81    (0xc45d1df942711d9a, 242, 92),
82    (0x924d692ca61be758, 269, 100),
83    (0xda01ee641a708dea, 295, 108),
84    (0xa26da3999aef774a, 322, 116),
85    (0xf209787bb47d6b85, 348, 124),
86    (0xb454e4a179dd1877, 375, 132),
87    (0x865b86925b9bc5c2, 402, 140),
88    (0xc83553c5c8965d3d, 428, 148),
89    (0x952ab45cfa97a0b3, 455, 156),
90    (0xde469fbd99a05fe3, 481, 164),
91    (0xa59bc234db398c25, 508, 172),
92    (0xf6c69a72a3989f5c, 534, 180),
93    (0xb7dcbf5354e9bece, 561, 188),
94    (0x88fcf317f22241e2, 588, 196),
95    (0xcc20ce9bd35c78a5, 614, 204),
96    (0x98165af37b2153df, 641, 212),
97    (0xe2a0b5dc971f303a, 667, 220),
98    (0xa8d9d1535ce3b396, 694, 228),
99    (0xfb9b7cd9a4a7443c, 720, 236),
100    (0xbb764c4ca7a44410, 747, 244),
101    (0x8bab8eefb6409c1a, 774, 252),
102    (0xd01fef10a657842c, 800, 260),
103    (0x9b10a4e5e9913129, 827, 268),
104    (0xe7109bfba19c0c9d, 853, 276),
105    (0xac2820d9623bf429, 880, 284),
106    (0x80444b5e7aa7cf85, 907, 292),
107    (0xbf21e44003acdd2d, 933, 300),
108    (0x8e679c2f5e44ff8f, 960, 308),
109    (0xd433179d9c8cb841, 986, 316),
110    (0x9e19db92b4e31ba9, 1013, 324),
111    (0xeb96bf6ebadf77d9, 1039, 332),
112];
113
114#[doc(hidden)]
115pub const CACHED_POW10_FIRST_E: i16 = -1087;
116#[doc(hidden)]
117pub const CACHED_POW10_LAST_E: i16 = 1039;
118
119#[doc(hidden)]
120pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
121    let offset = CACHED_POW10_FIRST_E as i32;
122    let range = (CACHED_POW10.len() as i32) - 1;
123    let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
124    let idx = ((gamma as i32) - offset) * range / domain;
125    let (f, e, k) = CACHED_POW10[idx as usize];
126    debug_assert!(alpha <= e && e <= gamma);
127    (k, Fp { f, e })
128}
129
130/// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
131#[doc(hidden)]
132pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
133    debug_assert!(x > 0);
134
135    const X9: u32 = 10_0000_0000;
136    const X8: u32 = 1_0000_0000;
137    const X7: u32 = 1000_0000;
138    const X6: u32 = 100_0000;
139    const X5: u32 = 10_0000;
140    const X4: u32 = 1_0000;
141    const X3: u32 = 1000;
142    const X2: u32 = 100;
143    const X1: u32 = 10;
144
145    if x < X4 {
146        if x < X2 {
147            if x < X1 { (0, 1) } else { (1, X1) }
148        } else {
149            if x < X3 { (2, X2) } else { (3, X3) }
150        }
151    } else {
152        if x < X6 {
153            if x < X5 { (4, X4) } else { (5, X5) }
154        } else if x < X8 {
155            if x < X7 { (6, X6) } else { (7, X7) }
156        } else {
157            if x < X9 { (8, X8) } else { (9, X9) }
158        }
159    }
160}
161
162/// The shortest mode implementation for Grisu.
163///
164/// It returns `None` when it would return an inexact representation otherwise.
165pub fn format_shortest_opt<'a>(
166    d: &Decoded,
167    buf: &'a mut [MaybeUninit<u8>],
168) -> Option<(/*digits*/ &'a [u8], /*exp*/ i16)> {
169    assert!(d.mant > 0);
170    assert!(d.minus > 0);
171    assert!(d.plus > 0);
172    assert!(d.mant.checked_add(d.plus).is_some());
173    assert!(d.mant.checked_sub(d.minus).is_some());
174    assert!(buf.len() >= MAX_SIG_DIGITS);
175    assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision
176
177    // start with the normalized values with the shared exponent
178    let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
179    let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
180    let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
181
182    // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
183    // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`;
184    // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`.
185    //
186    // it is obviously desirable to maximize `GAMMA - ALPHA`,
187    // so that we don't need many cached powers of 10, but there are some considerations:
188    //
189    // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division.
190    //    (this is not really avoidable, remainder is required for accuracy estimation.)
191    // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10,
192    //    and it should not overflow.
193    //
194    // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`;
195    // -60 and -32 is the maximal range with this constraint, and V8 also uses them.
196    let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
197
198    // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
199    let plus = plus.mul(&cached);
200    let minus = minus.mul(&cached);
201    let v = v.mul(&cached);
202    debug_assert_eq!(plus.e, minus.e);
203    debug_assert_eq!(plus.e, v.e);
204
205    //         +- actual range of minus
206    //   | <---|---------------------- unsafe region --------------------------> |
207    //   |     |                                                                 |
208    //   |  |<--->|  | <--------------- safe region ---------------> |           |
209    //   |  |     |  |                                               |           |
210    //   |1 ulp|1 ulp|                 |1 ulp|1 ulp|                 |1 ulp|1 ulp|
211    //   |<--->|<--->|                 |<--->|<--->|                 |<--->|<--->|
212    //   |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----|
213    //   |   minus   |                 |     v     |                 |   plus    |
214    // minus1     minus0           v - 1 ulp   v + 1 ulp           plus0       plus1
215    //
216    // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp).
217    // as we don't know the error is positive or negative, we use two approximations spaced equally
218    // and have the maximal error of 2 ulps.
219    //
220    // the "unsafe region" is a liberal interval which we initially generate.
221    // the "safe region" is a conservative interval which we only accept.
222    // we start with the correct repr within the unsafe region, and try to find the closest repr
223    // to `v` which is also within the safe region. if we can't, we give up.
224    let plus1 = plus.f + 1;
225    //  let plus0 = plus.f - 1; // only for explanation
226    //  let minus0 = minus.f + 1; // only for explanation
227    let minus1 = minus.f - 1;
228    let e = -plus.e as usize; // shared exponent
229
230    // divide `plus1` into integral and fractional parts.
231    // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
232    // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
233    let plus1int = (plus1 >> e) as u32;
234    let plus1frac = plus1 & ((1 << e) - 1);
235
236    // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
237    // this is an upper bound of `kappa` below.
238    let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
239
240    let mut i = 0;
241    let exp = max_kappa as i16 - minusk + 1;
242
243    // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
244    //              then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest
245    //              representations (with the minimal number of significant digits) in that range.
246    //
247    // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
248    // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
249    // (e.g., `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
250    // the algorithm relies on the later verification phase to exclude `y`.
251    let delta1 = plus1 - minus1;
252    //  let delta1int = (delta1 >> e) as usize; // only for explanation
253    let delta1frac = delta1 & ((1 << e) - 1);
254
255    // render integral parts, while checking for the accuracy at each step.
256    let mut ten_kappa = max_ten_kappa; // 10^kappa
257    let mut remainder = plus1int; // digits yet to be rendered
258    loop {
259        // we always have at least one digit to render, as `plus1 >= 10^kappa`
260        // invariants:
261        // - `delta1int <= remainder < 10^(kappa+1)`
262        // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder`
263        //   (it follows that `remainder = plus1int % 10^(kappa+1)`)
264
265        // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
266        let q = remainder / ten_kappa;
267        let r = remainder % ten_kappa;
268        debug_assert!(q < 10);
269        buf[i] = MaybeUninit::new(b'0' + q as u8);
270        i += 1;
271
272        let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e
273        if plus1rem < delta1 {
274            // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
275            let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
276            return round_and_weed(
277                // SAFETY: we initialized that memory above.
278                unsafe { buf[..i].assume_init_mut() },
279                exp,
280                plus1rem,
281                delta1,
282                plus1 - v.f,
283                ten_kappa,
284                1,
285            );
286        }
287
288        // break the loop when we have rendered all integral digits.
289        // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
290        if i > max_kappa as usize {
291            debug_assert_eq!(ten_kappa, 1);
292            break;
293        }
294
295        // restore invariants
296        ten_kappa /= 10;
297        remainder = r;
298    }
299
300    // render fractional parts, while checking for the accuracy at each step.
301    // this time we rely on repeated multiplications, as division will lose the precision.
302    let mut remainder = plus1frac;
303    let mut threshold = delta1frac;
304    let mut ulp = 1;
305    loop {
306        // the next digit should be significant as we've tested that before breaking out
307        // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
308        // - `remainder < 2^e`
309        // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
310
311        remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
312        threshold *= 10;
313        ulp *= 10;
314
315        // divide `remainder` by `10^kappa`.
316        // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
317        let q = remainder >> e;
318        let r = remainder & ((1 << e) - 1);
319        debug_assert!(q < 10);
320        buf[i] = MaybeUninit::new(b'0' + q as u8);
321        i += 1;
322
323        if r < threshold {
324            let ten_kappa = 1 << e; // implicit divisor
325            return round_and_weed(
326                // SAFETY: we initialized that memory above.
327                unsafe { buf[..i].assume_init_mut() },
328                exp,
329                r,
330                threshold,
331                (plus1 - v.f) * ulp,
332                ten_kappa,
333                ulp,
334            );
335        }
336
337        // restore invariants
338        remainder = r;
339    }
340
341    // we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
342    // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
343    // shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
344    // we have to successively decrease the last digit and check if this is the optimal repr.
345    // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
346    //
347    // the function checks if this "optimal" repr is actually within the ulp ranges,
348    // and also, it is possible that the "second-to-optimal" repr can actually be optimal
349    // due to the rounding error. in either cases this returns `None`. ("weeding" phase)
350    //
351    // all arguments here are scaled by the common (but implicit) value `k`, so that:
352    // - `remainder = (plus1 % 10^kappa) * k`
353    // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`)
354    // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants)
355    // - `ten_kappa = 10^kappa * k`
356    // - `ulp = 2^-e * k`
357    fn round_and_weed(
358        buf: &mut [u8],
359        exp: i16,
360        remainder: u64,
361        threshold: u64,
362        plus1v: u64,
363        ten_kappa: u64,
364        ulp: u64,
365    ) -> Option<(&[u8], i16)> {
366        assert!(!buf.is_empty());
367
368        // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
369        // the resulting representation should be the closest representation to both.
370        //
371        // here `plus1 - v` is used since calculations are done with respect to `plus1`
372        // in order to avoid overflow/underflow (hence the seemingly swapped names).
373        let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
374        let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
375
376        // decrease the last digit and stop at the closest representation to `v + 1 ulp`.
377        let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
378        {
379            let last = buf.last_mut().unwrap();
380
381            // we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
382            // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
383            // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) =
384            // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.
385            // note that `plus1w(n)` is always increasing.
386            //
387            // we have three conditions to terminate. any of them will make the loop unable to
388            // proceed, but we then have at least one valid representation known to be closest to
389            // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
390            //
391            // TC1: `w(n) <= v + 1 ulp`, i.e., this is the last repr that can be the closest one.
392            // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
393            // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
394            // overflow on the calculation of `plus1w(n)`.
395            //
396            // TC2: `w(n+1) < minus1`, i.e., the next repr definitely does not round to `v`.
397            // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
398            // plus1 - minus1 = threshold`. the left hand side can overflow, but we know
399            // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
400            // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
401            // `threshold - plus1w(n) < 10^kappa` instead.
402            //
403            // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e., the next repr is
404            // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
405            // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
406            // `z(n) > 0`. we have two cases to consider:
407            //
408            // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
409            //   `z(n)` should be decreasing and this is clearly false.
410            // - when `z(n+1) < 0`:
411            //   - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
412            //     false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
413            //   - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e., `plus1v_up - plus1w(n) >=
414            //     plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
415            //     gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
416            //     combined with TC3a.
417            //
418            // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is
419            // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`.
420            while plus1w < plus1v_up
421                && threshold - plus1w >= ten_kappa
422                && (plus1w + ten_kappa < plus1v_up
423                    || plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up)
424            {
425                *last -= 1;
426                debug_assert!(*last > b'0'); // the shortest repr cannot end with `0`
427                plus1w += ten_kappa;
428            }
429        }
430
431        // check if this representation is also the closest representation to `v - 1 ulp`.
432        //
433        // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
434        // replaced by `plus1v_down` instead. overflow analysis equally holds.
435        if plus1w < plus1v_down
436            && threshold - plus1w >= ten_kappa
437            && (plus1w + ten_kappa < plus1v_down
438                || plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down)
439        {
440            return None;
441        }
442
443        // now we have the closest representation to `v` between `plus1` and `minus1`.
444        // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
445        // i.e., `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
446        // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
447        if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp { Some((buf, exp)) } else { None }
448    }
449}
450
451/// The shortest mode implementation for Grisu with Dragon fallback.
452///
453/// This should be used for most cases.
454pub fn format_shortest<'a>(
455    d: &Decoded,
456    buf: &'a mut [MaybeUninit<u8>],
457) -> (/*digits*/ &'a [u8], /*exp*/ i16) {
458    use crate::num::flt2dec::strategy::dragon::format_shortest as fallback;
459    // SAFETY: The borrow checker is not smart enough to let us use `buf`
460    // in the second branch, so we launder the lifetime here. But we only re-use
461    // `buf` if `format_shortest_opt` returned `None` so this is okay.
462    match format_shortest_opt(d, unsafe { &mut *(buf as *mut _) }) {
463        Some(ret) => ret,
464        None => fallback(d, buf),
465    }
466}
467
468/// The exact and fixed mode implementation for Grisu.
469///
470/// It returns `None` when it would return an inexact representation otherwise.
471pub fn format_exact_opt<'a>(
472    d: &Decoded,
473    buf: &'a mut [MaybeUninit<u8>],
474    limit: i16,
475) -> Option<(/*digits*/ &'a [u8], /*exp*/ i16)> {
476    assert!(d.mant > 0);
477    assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision
478    assert!(!buf.is_empty());
479
480    // normalize and scale `v`.
481    let v = Fp { f: d.mant, e: d.exp }.normalize();
482    let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
483    let v = v.mul(&cached);
484
485    // divide `v` into integral and fractional parts.
486    let e = -v.e as usize;
487    let vint = (v.f >> e) as u32;
488    let vfrac = v.f & ((1 << e) - 1);
489
490    let requested_digits = buf.len();
491
492    const POW10_UP_TO_9: [u32; 10] =
493        [1, 10, 100, 1000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000, 1_000_000_000];
494
495    // We deviate from the original algorithm here and do some early checks to determine if we can satisfy requested_digits.
496    // If we determine that we can't, we exit early and avoid most of the heavy lifting that the algorithm otherwise does.
497    //
498    // When vfrac is zero, we can easily determine if vint can satisfy requested digits:
499    //      If requested_digits >= 11, vint is not able to exhaust the count by itself since 10^(11 -1) > u32 max value >= vint.
500    //      If vint < 10^(requested_digits - 1), vint cannot exhaust the count.
501    //      Otherwise, vint might be able to exhaust the count and we need to execute the rest of the code.
502    if (vfrac == 0) && ((requested_digits >= 11) || (vint < POW10_UP_TO_9[requested_digits - 1])) {
503        return None;
504    }
505
506    // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
507    // as we don't know the error is positive or negative, we use two approximations
508    // spaced equally and have the maximal error of 2 ulps (same to the shortest case).
509    //
510    // the goal is to find the exactly rounded series of digits that are common to
511    // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
512    // if this is not possible, we don't know which one is the correct output for `v`,
513    // so we give up and fall back.
514    //
515    // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`),
516    // and we will scale it whenever `v` gets scaled.
517    let mut err = 1;
518
519    // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
520    // this is an upper bound of `kappa` below.
521    let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
522
523    let mut i = 0;
524    let exp = max_kappa as i16 - minusk + 1;
525
526    // if we are working with the last-digit limitation, we need to shorten the buffer
527    // before the actual rendering in order to avoid double rounding.
528    // note that we have to enlarge the buffer again when rounding up happens!
529    let len = if exp <= limit {
530        // oops, we cannot even produce *one* digit.
531        // this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
532        //
533        // in principle we can immediately call `possibly_round` with an empty buffer,
534        // but scaling `max_ten_kappa << e` by 10 can result in overflow.
535        // thus we are being sloppy here and widen the error range by a factor of 10.
536        // this will increase the false negative rate, but only very, *very* slightly;
537        // it can only matter noticeably when the mantissa is bigger than 60 bits.
538        //
539        // SAFETY: `len=0`, so the obligation of having initialized this memory is trivial.
540        return unsafe {
541            possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e)
542        };
543    } else if ((exp as i32 - limit as i32) as usize) < buf.len() {
544        (exp - limit) as usize
545    } else {
546        buf.len()
547    };
548    debug_assert!(len > 0);
549
550    // render integral parts.
551    // the error is entirely fractional, so we don't need to check it in this part.
552    let mut kappa = max_kappa as i16;
553    let mut ten_kappa = max_ten_kappa; // 10^kappa
554    let mut remainder = vint; // digits yet to be rendered
555    loop {
556        // we always have at least one digit to render
557        // invariants:
558        // - `remainder < 10^(kappa+1)`
559        // - `vint = d[0..n-1] * 10^(kappa+1) + remainder`
560        //   (it follows that `remainder = vint % 10^(kappa+1)`)
561
562        // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
563        let q = remainder / ten_kappa;
564        let r = remainder % ten_kappa;
565        debug_assert!(q < 10);
566        buf[i] = MaybeUninit::new(b'0' + q as u8);
567        i += 1;
568
569        // is the buffer full? run the rounding pass with the remainder.
570        if i == len {
571            let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e
572            // SAFETY: we have initialized `len` many bytes.
573            return unsafe {
574                possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e)
575            };
576        }
577
578        // break the loop when we have rendered all integral digits.
579        // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
580        if i > max_kappa as usize {
581            debug_assert_eq!(ten_kappa, 1);
582            debug_assert_eq!(kappa, 0);
583            break;
584        }
585
586        // restore invariants
587        kappa -= 1;
588        ten_kappa /= 10;
589        remainder = r;
590    }
591
592    // render fractional parts.
593    //
594    // in principle we can continue to the last available digit and check for the accuracy.
595    // unfortunately we are working with the finite-sized integers, so we need some criterion
596    // to detect the overflow. V8 uses `remainder > err`, which becomes false when
597    // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
598    // too many otherwise valid input.
599    //
600    // since the later phase has a correct overflow detection, we instead use tighter criterion:
601    // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
602    // `v + 1 ulp` definitely contains two or more rounded representations. this is same to
603    // the first two comparisons from `possibly_round`, for the reference.
604    let mut remainder = vfrac;
605    let maxerr = 1 << (e - 1);
606    while err < maxerr {
607        // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
608        // - `remainder < 2^e`
609        // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
610        // - `err = 10^(n-m)`
611
612        remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
613        err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64`
614
615        // divide `remainder` by `10^kappa`.
616        // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
617        let q = remainder >> e;
618        let r = remainder & ((1 << e) - 1);
619        debug_assert!(q < 10);
620        buf[i] = MaybeUninit::new(b'0' + q as u8);
621        i += 1;
622
623        // is the buffer full? run the rounding pass with the remainder.
624        if i == len {
625            // SAFETY: we have initialized `len` many bytes.
626            return unsafe { possibly_round(buf, len, exp, limit, r, 1 << e, err) };
627        }
628
629        // restore invariants
630        remainder = r;
631    }
632
633    // further calculation is useless (`possibly_round` definitely fails), so we give up.
634    return None;
635
636    // we've generated all requested digits of `v`, which should be also same to corresponding
637    // digits of `v - 1 ulp`. now we check if there is a unique representation shared by
638    // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
639    // to the rounded-up version of those digits. if the range contains multiple representations
640    // of the same length, we cannot be sure and should return `None` instead.
641    //
642    // all arguments here are scaled by the common (but implicit) value `k`, so that:
643    // - `remainder = (v % 10^kappa) * k`
644    // - `ten_kappa = 10^kappa * k`
645    // - `ulp = 2^-e * k`
646    //
647    // SAFETY: the first `len` bytes of `buf` must be initialized.
648    unsafe fn possibly_round(
649        buf: &mut [MaybeUninit<u8>],
650        mut len: usize,
651        mut exp: i16,
652        limit: i16,
653        remainder: u64,
654        ten_kappa: u64,
655        ulp: u64,
656    ) -> Option<(&[u8], i16)> {
657        debug_assert!(remainder < ten_kappa);
658
659        //           10^kappa
660        //    :   :   :<->:   :
661        //    :   :   :   :   :
662        //    :|1 ulp|1 ulp|  :
663        //    :|<--->|<--->|  :
664        // ----|-----|-----|----
665        //     |     v     |
666        // v - 1 ulp   v + 1 ulp
667        //
668        // (for the reference, the dotted line indicates the exact value for
669        // possible representations in given number of digits.)
670        //
671        // error is too large that there are at least three possible representations
672        // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
673        if ulp >= ten_kappa {
674            return None;
675        }
676
677        //    10^kappa
678        //   :<------->:
679        //   :         :
680        //   : |1 ulp|1 ulp|
681        //   : |<--->|<--->|
682        // ----|-----|-----|----
683        //     |     v     |
684        // v - 1 ulp   v + 1 ulp
685        //
686        // in fact, 1/2 ulp is enough to introduce two possible representations.
687        // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
688        // this won't overflow, as `ulp < ten_kappa` from the first check.
689        if ten_kappa - ulp <= ulp {
690            return None;
691        }
692
693        //     remainder
694        //       :<->|                           :
695        //       :   |                           :
696        //       :<--------- 10^kappa ---------->:
697        //     | :   |                           :
698        //     |1 ulp|1 ulp|                     :
699        //     |<--->|<--->|                     :
700        // ----|-----|-----|------------------------
701        //     |     v     |
702        // v - 1 ulp   v + 1 ulp
703        //
704        // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
705        // then we can safely return. note that `v - 1 ulp` *can* be less than the current
706        // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
707        // the distance between `v - 1 ulp` and the current representation
708        // cannot exceed `10^kappa / 2`.
709        //
710        // the condition equals to `remainder + ulp < 10^kappa / 2`.
711        // since this can easily overflow, first check if `remainder < 10^kappa / 2`.
712        // we've already verified that `ulp < 10^kappa / 2`, so as long as
713        // `10^kappa` did not overflow after all, the second check is fine.
714        if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp {
715            // SAFETY: our caller initialized that memory.
716            return Some((unsafe { buf[..len].assume_init_ref() }, exp));
717        }
718
719        //   :<------- remainder ------>|   :
720        //   :                          |   :
721        //   :<--------- 10^kappa --------->:
722        //   :                    |     |   : |
723        //   :                    |1 ulp|1 ulp|
724        //   :                    |<--->|<--->|
725        // -----------------------|-----|-----|-----
726        //                        |     v     |
727        //                    v - 1 ulp   v + 1 ulp
728        //
729        // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
730        // we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
731        //
732        // the condition equals to `remainder - ulp >= 10^kappa / 2`.
733        // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
734        // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
735        // so the second check does not overflow.
736        if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
737            if let Some(c) =
738                // SAFETY: our caller must have initialized that memory.
739                round_up(unsafe { buf[..len].assume_init_mut() })
740            {
741                // only add an additional digit when we've been requested the fixed precision.
742                // we also need to check that, if the original buffer was empty,
743                // the additional digit can only be added when `exp == limit` (edge case).
744                exp += 1;
745                if exp > limit && len < buf.len() {
746                    buf[len] = MaybeUninit::new(c);
747                    len += 1;
748                }
749            }
750            // SAFETY: we and our caller initialized that memory.
751            return Some((unsafe { buf[..len].assume_init_ref() }, exp));
752        }
753
754        // otherwise we are doomed (i.e., some values between `v - 1 ulp` and `v + 1 ulp` are
755        // rounding down and others are rounding up) and give up.
756        None
757    }
758}
759
760/// The exact and fixed mode implementation for Grisu with Dragon fallback.
761///
762/// This should be used for most cases.
763pub fn format_exact<'a>(
764    d: &Decoded,
765    buf: &'a mut [MaybeUninit<u8>],
766    limit: i16,
767) -> (/*digits*/ &'a [u8], /*exp*/ i16) {
768    use crate::num::flt2dec::strategy::dragon::format_exact as fallback;
769    // SAFETY: The borrow checker is not smart enough to let us use `buf`
770    // in the second branch, so we launder the lifetime here. But we only re-use
771    // `buf` if `format_exact_opt` returned `None` so this is okay.
772    match format_exact_opt(d, unsafe { &mut *(buf as *mut _) }, limit) {
773        Some(ret) => ret,
774        None => fallback(d, buf, limit),
775    }
776}
core/num/flt2dec/strategy/grisu.rs

core/num/flt2dec/strategy/
grisu.rs