lexical_parse_float/
lemire.rs

1//! Implementation of the Eisel-Lemire algorithm.
2//!
3//! This is adapted from [fast-float-rust](https://github.com/aldanor/fast-float-rust),
4//! a port of [fast_float](https://github.com/fastfloat/fast_float) to Rust.
5
6#![cfg(not(feature = "compact"))]
7#![doc(hidden)]
8
9use crate::float::{ExtendedFloat80, LemireFloat};
10use crate::number::Number;
11use crate::shared;
12use crate::table::{LARGEST_POWER_OF_FIVE, POWER_OF_FIVE_128, SMALLEST_POWER_OF_FIVE};
13
14/// Ensure truncation of digits doesn't affect our computation, by doing 2
15/// passes.
16#[must_use]
17#[inline(always)]
18pub fn lemire<F: LemireFloat>(num: &Number, lossy: bool) -> ExtendedFloat80 {
19    // If significant digits were truncated, then we can have rounding error
20    // only if `mantissa + 1` produces a different result. We also avoid
21    // redundantly using the Eisel-Lemire algorithm if it was unable to
22    // correctly round on the first pass.
23    let mut fp = compute_float::<F>(num.exponent, num.mantissa, lossy);
24    if !lossy
25        && num.many_digits
26        && fp.exp >= 0
27        && fp != compute_float::<F>(num.exponent, num.mantissa + 1, false)
28    {
29        // Need to re-calculate, since the previous values are rounded
30        // when the slow path algorithm expects a normalized extended float.
31        fp = compute_error::<F>(num.exponent, num.mantissa);
32    }
33    fp
34}
35
36/// Compute a float using an extended-precision representation.
37///
38/// Fast conversion of a the significant digits and decimal exponent
39/// a float to a extended representation with a binary float. This
40/// algorithm will accurately parse the vast majority of cases,
41/// and uses a 128-bit representation (with a fallback 192-bit
42/// representation).
43///
44/// This algorithm scales the exponent by the decimal exponent
45/// using pre-computed powers-of-5, and calculates if the
46/// representation can be unambiguously rounded to the nearest
47/// machine float. Near-halfway cases are not handled here,
48/// and are represented by a negative, biased binary exponent.
49///
50/// The algorithm is described in detail in "Daniel Lemire, Number Parsing
51/// at a Gigabyte per Second" in section 5, "Fast Algorithm", and
52/// section 6, "Exact Numbers And Ties", available online:
53/// <https://arxiv.org/abs/2101.11408.pdf>.
54#[must_use]
55#[allow(clippy::missing_inline_in_public_items)] // reason="public for testing only"
56pub fn compute_float<F: LemireFloat>(q: i64, mut w: u64, lossy: bool) -> ExtendedFloat80 {
57    let fp_zero = ExtendedFloat80 {
58        mant: 0,
59        exp: 0,
60    };
61    let fp_inf = ExtendedFloat80 {
62        mant: 0,
63        exp: F::INFINITE_POWER,
64    };
65
66    // Short-circuit if the value can only be a literal 0 or infinity.
67    if w == 0 || q < F::SMALLEST_POWER_OF_TEN as i64 {
68        return fp_zero;
69    } else if q > F::LARGEST_POWER_OF_TEN as i64 {
70        return fp_inf;
71    }
72    // Normalize our significant digits, so the most-significant bit is set.
73    let lz = w.leading_zeros() as i32;
74    w <<= lz;
75    let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3);
76    if !lossy && lo == 0xFFFF_FFFF_FFFF_FFFF {
77        // If we have failed to approximate `w x 5^-q` with our 128-bit value.
78        // Since the addition of 1 could lead to an overflow which could then
79        // round up over the half-way point, this can lead to improper rounding
80        // of a float.
81        //
82        // However, this can only occur if `q ∈ [-27, 55]`. The upper bound of q
83        // is 55 because `5^55 < 2^128`, however, this can only happen if `5^q > 2^64`,
84        // since otherwise the product can be represented in 64-bits, producing
85        // an exact result. For negative exponents, rounding-to-even can
86        // only occur if `5^-q < 2^64`.
87        //
88        // For detailed explanations of rounding for negative exponents, see
89        // <https://arxiv.org/pdf/2101.11408.pdf#section.9.1>. For detailed
90        // explanations of rounding for positive exponents, see
91        // <https://arxiv.org/pdf/2101.11408.pdf#section.8>.
92        let inside_safe_exponent = (-27..=55).contains(&q);
93        if !inside_safe_exponent {
94            return compute_error_scaled::<F>(q, hi, lz);
95        }
96    }
97    let upperbit = (hi >> 63) as i32;
98    let mut mantissa = hi >> (upperbit + 64 - F::MANTISSA_SIZE - 3);
99    let mut power2 = power(q as i32) + upperbit - lz - F::MINIMUM_EXPONENT;
100    if power2 <= 0 {
101        if -power2 + 1 >= 64 {
102            // Have more than 64 bits below the minimum exponent, must be 0.
103            return fp_zero;
104        }
105        // Have a subnormal value.
106        mantissa >>= -power2 + 1;
107        mantissa += mantissa & 1;
108        mantissa >>= 1;
109        power2 = (mantissa >= (1_u64 << F::MANTISSA_SIZE)) as i32;
110        return ExtendedFloat80 {
111            mant: mantissa,
112            exp: power2,
113        };
114    }
115    // Need to handle rounding ties. Normally, we need to round up,
116    // but if we fall right in between and and we have an even basis, we
117    // need to round down.
118    //
119    // This will only occur if:
120    //  1. The lower 64 bits of the 128-bit representation is 0. IE, `5^q` fits in
121    //     single 64-bit word.
122    //  2. The least-significant bit prior to truncated mantissa is odd.
123    //  3. All the bits truncated when shifting to mantissa bits + 1 are 0.
124    //
125    // Or, we may fall between two floats: we are exactly halfway.
126    if lo <= 1
127        && q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64
128        && q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64
129        && mantissa & 3 == 1
130        && (mantissa << (upperbit + 64 - F::MANTISSA_SIZE - 3)) == hi
131    {
132        // Zero the lowest bit, so we don't round up.
133        mantissa &= !1_u64;
134    }
135    // Round-to-even, then shift the significant digits into place.
136    mantissa += mantissa & 1;
137    mantissa >>= 1;
138    if mantissa >= (2_u64 << F::MANTISSA_SIZE) {
139        // Rounding up overflowed, so the carry bit is set. Set the
140        // mantissa to 1 (only the implicit, hidden bit is set) and
141        // increase the exponent.
142        mantissa = 1_u64 << F::MANTISSA_SIZE;
143        power2 += 1;
144    }
145    // Zero out the hidden bit.
146    mantissa &= !(1_u64 << F::MANTISSA_SIZE);
147    if power2 >= F::INFINITE_POWER {
148        // Exponent is above largest normal value, must be infinite.
149        return fp_inf;
150    }
151    ExtendedFloat80 {
152        mant: mantissa,
153        exp: power2,
154    }
155}
156
157/// Fallback algorithm to calculate the non-rounded representation.
158/// This calculates the extended representation, and then normalizes
159/// the resulting representation, so the high bit is set.
160#[must_use]
161#[inline(always)]
162pub fn compute_error<F: LemireFloat>(q: i64, mut w: u64) -> ExtendedFloat80 {
163    let lz = w.leading_zeros() as i32;
164    w <<= lz;
165    let hi = compute_product_approx(q, w, F::MANTISSA_SIZE as usize + 3).1;
166    compute_error_scaled::<F>(q, hi, lz)
167}
168
169/// Compute the error from a mantissa scaled to the exponent.
170#[must_use]
171#[inline(always)]
172pub const fn compute_error_scaled<F: LemireFloat>(q: i64, mut w: u64, lz: i32) -> ExtendedFloat80 {
173    // Want to normalize the float, but this is faster than ctlz on most
174    // architectures.
175    let hilz = (w >> 63) as i32 ^ 1;
176    w <<= hilz;
177    let power2 = power(q as i32) + F::EXPONENT_BIAS - hilz - lz - 62;
178
179    ExtendedFloat80 {
180        mant: w,
181        exp: power2 + shared::INVALID_FP,
182    }
183}
184
185/// Calculate a base 2 exponent from a decimal exponent.
186/// This uses a pre-computed integer approximation for
187/// log2(10), where 217706 / 2^16 is accurate for the
188/// entire range of non-finite decimal exponents.
189#[inline(always)]
190const fn power(q: i32) -> i32 {
191    (q.wrapping_mul(152_170 + 65536) >> 16) + 63
192}
193
194#[inline(always)]
195const fn full_multiplication(a: u64, b: u64) -> (u64, u64) {
196    let r = (a as u128) * (b as u128);
197    (r as u64, (r >> 64) as u64)
198}
199
200// This will compute or rather approximate `w * 5**q` and return a pair of
201// 64-bit words approximating the result, with the "high" part corresponding to
202// the most significant bits and the low part corresponding to the least
203// significant bits.
204fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) {
205    debug_assert!(q >= SMALLEST_POWER_OF_FIVE as i64, "must be within our required pow5 range");
206    debug_assert!(q <= LARGEST_POWER_OF_FIVE as i64, "must be within our required pow5 range");
207    debug_assert!(precision <= 64, "requires a 64-bit or smaller float");
208
209    let mask = if precision < 64 {
210        0xFFFF_FFFF_FFFF_FFFF_u64 >> precision
211    } else {
212        0xFFFF_FFFF_FFFF_FFFF_u64
213    };
214
215    // `5^q < 2^64`, then the multiplication always provides an exact value.
216    // That means whenever we need to round ties to even, we always have
217    // an exact value.
218    let index = (q - SMALLEST_POWER_OF_FIVE as i64) as usize;
219    let (lo5, hi5) = POWER_OF_FIVE_128[index];
220    // Only need one multiplication as long as there is 1 zero but
221    // in the explicit mantissa bits, +1 for the hidden bit, +1 to
222    // determine the rounding direction, +1 for if the computed
223    // product has a leading zero.
224    let (mut first_lo, mut first_hi) = full_multiplication(w, lo5);
225    if first_hi & mask == mask {
226        // Need to do a second multiplication to get better precision
227        // for the lower product. This will always be exact
228        // where q is < 55, since 5^55 < 2^128. If this wraps,
229        // then we need to need to round up the hi product.
230        let (_, second_hi) = full_multiplication(w, hi5);
231        first_lo = first_lo.wrapping_add(second_hi);
232        if second_hi > first_lo {
233            first_hi += 1;
234        }
235    }
236    (first_lo, first_hi)
237}