core/num/dec2flt/
parse.rs

Help
1//! Functions to parse floating-point numbers.
2
3use crate::num::dec2flt::common::{ByteSlice, is_8digits};
4use crate::num::dec2flt::decimal::Decimal;
5use crate::num::dec2flt::float::RawFloat;
6
7const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
8
9/// Parse 8 digits, loaded as bytes in little-endian order.
10///
11/// This uses the trick where every digit is in [0x030, 0x39],
12/// and therefore can be parsed in 3 multiplications, much
13/// faster than the normal 8.
14///
15/// This is based off the algorithm described in "Fast numeric string to
16/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
17fn parse_8digits(mut v: u64) -> u64 {
18    const MASK: u64 = 0x0000_00FF_0000_00FF;
19    const MUL1: u64 = 0x000F_4240_0000_0064;
20    const MUL2: u64 = 0x0000_2710_0000_0001;
21    v -= 0x3030_3030_3030_3030;
22    v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
23    let v1 = (v & MASK).wrapping_mul(MUL1);
24    let v2 = ((v >> 16) & MASK).wrapping_mul(MUL2);
25    ((v1.wrapping_add(v2) >> 32) as u32) as u64
26}
27
28/// Parse digits until a non-digit character is found.
29fn try_parse_digits(mut s: &[u8], mut x: u64) -> (&[u8], u64) {
30    // may cause overflows, to be handled later
31
32    while s.len() >= 8 {
33        let num = s.read_u64();
34        if is_8digits(num) {
35            x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(num));
36            s = &s[8..];
37        } else {
38            break;
39        }
40    }
41
42    s = s.parse_digits(|digit| {
43        x = x.wrapping_mul(10).wrapping_add(digit as _);
44    });
45
46    (s, x)
47}
48
49/// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
50fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) {
51    let mut s = *s_ref;
52
53    while *x < MIN_19DIGIT_INT {
54        if let Some((c, s_next)) = s.split_first() {
55            let digit = c.wrapping_sub(b'0');
56
57            if digit < 10 {
58                *x = (*x * 10) + digit as u64; // no overflows here
59                s = s_next;
60            } else {
61                break;
62            }
63        } else {
64            break;
65        }
66    }
67
68    *s_ref = s;
69}
70
71/// Parse the scientific notation component of a float.
72fn parse_scientific(s_ref: &mut &[u8]) -> Option<i64> {
73    let mut exponent = 0i64;
74    let mut negative = false;
75
76    let mut s = *s_ref;
77
78    if let Some((&c, s_next)) = s.split_first() {
79        negative = c == b'-';
80        if c == b'-' || c == b'+' {
81            s = s_next;
82        }
83    }
84
85    if matches!(s.first(), Some(&x) if x.is_ascii_digit()) {
86        *s_ref = s.parse_digits(|digit| {
87            // no overflows here, saturate well before overflow
88            if exponent < 0x10000 {
89                exponent = 10 * exponent + digit as i64;
90            }
91        });
92        if negative { Some(-exponent) } else { Some(exponent) }
93    } else {
94        *s_ref = s;
95        None
96    }
97}
98
99/// Parse a partial, non-special floating point number.
100///
101/// This creates a representation of the float as the
102/// significant digits and the decimal exponent.
103fn parse_partial_number(mut s: &[u8]) -> Option<(Decimal, usize)> {
104    debug_assert!(!s.is_empty());
105
106    // parse initial digits before dot
107    let mut mantissa = 0_u64;
108    let start = s;
109    let tmp = try_parse_digits(s, mantissa);
110    s = tmp.0;
111    mantissa = tmp.1;
112    let mut n_digits = s.offset_from(start);
113
114    // handle dot with the following digits
115    let mut n_after_dot = 0;
116    let mut exponent = 0_i64;
117    let int_end = s;
118
119    if let Some((&b'.', s_next)) = s.split_first() {
120        s = s_next;
121        let before = s;
122        let tmp = try_parse_digits(s, mantissa);
123        s = tmp.0;
124        mantissa = tmp.1;
125        n_after_dot = s.offset_from(before);
126        exponent = -n_after_dot as i64;
127    }
128
129    n_digits += n_after_dot;
130    if n_digits == 0 {
131        return None;
132    }
133
134    // handle scientific format
135    let mut exp_number = 0_i64;
136    if let Some((&c, s_next)) = s.split_first() {
137        if c == b'e' || c == b'E' {
138            s = s_next;
139            // If None, we have no trailing digits after exponent, or an invalid float.
140            exp_number = parse_scientific(&mut s)?;
141            exponent += exp_number;
142        }
143    }
144
145    let len = s.offset_from(start) as _;
146
147    // handle uncommon case with many digits
148    if n_digits <= 19 {
149        return Some((Decimal { exponent, mantissa, negative: false, many_digits: false }, len));
150    }
151
152    n_digits -= 19;
153    let mut many_digits = false;
154    let mut p = start;
155    while let Some((&c, p_next)) = p.split_first() {
156        if c == b'.' || c == b'0' {
157            n_digits -= c.saturating_sub(b'0' - 1) as isize;
158            p = p_next;
159        } else {
160            break;
161        }
162    }
163    if n_digits > 0 {
164        // at this point we have more than 19 significant digits, let's try again
165        many_digits = true;
166        mantissa = 0;
167        let mut s = start;
168        try_parse_19digits(&mut s, &mut mantissa);
169        exponent = if mantissa >= MIN_19DIGIT_INT {
170            // big int
171            int_end.offset_from(s)
172        } else {
173            s = &s[1..];
174            let before = s;
175            try_parse_19digits(&mut s, &mut mantissa);
176            -s.offset_from(before)
177        } as i64;
178        // add back the explicit part
179        exponent += exp_number;
180    }
181
182    Some((Decimal { exponent, mantissa, negative: false, many_digits }, len))
183}
184
185/// Try to parse a non-special floating point number,
186/// as well as two slices with integer and fractional parts
187/// and the parsed exponent.
188pub fn parse_number(s: &[u8]) -> Option<Decimal> {
189    if let Some((float, rest)) = parse_partial_number(s) {
190        if rest == s.len() {
191            return Some(float);
192        }
193    }
194    None
195}
196
197/// Try to parse a special, non-finite float.
198pub(crate) fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<F> {
199    // Since a valid string has at most the length 8, we can load
200    // all relevant characters into a u64 and work from there.
201    // This also generates much better code.
202
203    let mut register;
204    let len: usize;
205
206    // All valid strings are either of length 8 or 3.
207    if s.len() == 8 {
208        register = s.read_u64();
209        len = 8;
210    } else if s.len() == 3 {
211        let a = s[0] as u64;
212        let b = s[1] as u64;
213        let c = s[2] as u64;
214        register = (c << 16) | (b << 8) | a;
215        len = 3;
216    } else {
217        return None;
218    }
219
220    // Clear out the bits which turn ASCII uppercase characters into
221    // lowercase characters. The resulting string is all uppercase.
222    // What happens to other characters is irrelevant.
223    register &= 0xDFDFDFDFDFDFDFDF;
224
225    // u64 values corresponding to relevant cases
226    const INF_3: u64 = 0x464E49; // "INF"
227    const INF_8: u64 = 0x5954494E49464E49; // "INFINITY"
228    const NAN: u64 = 0x4E414E; // "NAN"
229
230    // Match register value to constant to parse string.
231    // Also match on the string length to catch edge cases
232    // like "inf\0\0\0\0\0".
233    let float = match (register, len) {
234        (INF_3, 3) => F::INFINITY,
235        (INF_8, 8) => F::INFINITY,
236        (NAN, 3) => F::NAN,
237        _ => return None,
238    };
239
240    if negative { Some(-float) } else { Some(float) }
241}
core/num/dec2flt/parse.rs

core/num/dec2flt/
parse.rs