serde_json/lexical/
errors.rs

1// Adapted from https://github.com/Alexhuszagh/rust-lexical.
2
3//! Estimate the error in an 80-bit approximation of a float.
4//!
5//! This estimates the error in a floating-point representation.
6//!
7//! This implementation is loosely based off the Golang implementation,
8//! found here: <https://golang.org/src/strconv/atof.go>
9
10use super::float::*;
11use super::num::*;
12use super::rounding::*;
13
14pub(crate) trait FloatErrors {
15    /// Get the full error scale.
16    fn error_scale() -> u32;
17    /// Get the half error scale.
18    fn error_halfscale() -> u32;
19    /// Determine if the number of errors is tolerable for float precision.
20    fn error_is_accurate<F: Float>(count: u32, fp: &ExtendedFloat) -> bool;
21}
22
23/// Check if the error is accurate with a round-nearest rounding scheme.
24#[inline]
25fn nearest_error_is_accurate(errors: u64, fp: &ExtendedFloat, extrabits: u64) -> bool {
26    // Round-to-nearest, need to use the halfway point.
27    if extrabits == 65 {
28        // Underflow, we have a shift larger than the mantissa.
29        // Representation is valid **only** if the value is close enough
30        // overflow to the next bit within errors. If it overflows,
31        // the representation is **not** valid.
32        !fp.mant.overflowing_add(errors).1
33    } else {
34        let mask: u64 = lower_n_mask(extrabits);
35        let extra: u64 = fp.mant & mask;
36
37        // Round-to-nearest, need to check if we're close to halfway.
38        // IE, b10100 | 100000, where `|` signifies the truncation point.
39        let halfway: u64 = lower_n_halfway(extrabits);
40        let cmp1 = halfway.wrapping_sub(errors) < extra;
41        let cmp2 = extra < halfway.wrapping_add(errors);
42
43        // If both comparisons are true, we have significant rounding error,
44        // and the value cannot be exactly represented. Otherwise, the
45        // representation is valid.
46        !(cmp1 && cmp2)
47    }
48}
49
50impl FloatErrors for u64 {
51    #[inline]
52    fn error_scale() -> u32 {
53        8
54    }
55
56    #[inline]
57    fn error_halfscale() -> u32 {
58        u64::error_scale() / 2
59    }
60
61    #[inline]
62    fn error_is_accurate<F: Float>(count: u32, fp: &ExtendedFloat) -> bool {
63        // Determine if extended-precision float is a good approximation.
64        // If the error has affected too many units, the float will be
65        // inaccurate, or if the representation is too close to halfway
66        // that any operations could affect this halfway representation.
67        // See the documentation for dtoa for more information.
68        let bias = -(F::EXPONENT_BIAS - F::MANTISSA_SIZE);
69        let denormal_exp = bias - 63;
70        // This is always a valid u32, since (denormal_exp - fp.exp)
71        // will always be positive and the significand size is {23, 52}.
72        let extrabits = if fp.exp <= denormal_exp {
73            64 - F::MANTISSA_SIZE + denormal_exp - fp.exp
74        } else {
75            63 - F::MANTISSA_SIZE
76        };
77
78        // Our logic is as follows: we want to determine if the actual
79        // mantissa and the errors during calculation differ significantly
80        // from the rounding point. The rounding point for round-nearest
81        // is the halfway point, IE, this when the truncated bits start
82        // with b1000..., while the rounding point for the round-toward
83        // is when the truncated bits are equal to 0.
84        // To do so, we can check whether the rounding point +/- the error
85        // are >/< the actual lower n bits.
86        //
87        // For whether we need to use signed or unsigned types for this
88        // analysis, see this example, using u8 rather than u64 to simplify
89        // things.
90        //
91        // # Comparisons
92        //      cmp1 = (halfway - errors) < extra
93        //      cmp1 = extra < (halfway + errors)
94        //
95        // # Large Extrabits, Low Errors
96        //
97        //      extrabits = 8
98        //      halfway          =  0b10000000
99        //      extra            =  0b10000010
100        //      errors           =  0b00000100
101        //      halfway - errors =  0b01111100
102        //      halfway + errors =  0b10000100
103        //
104        //      Unsigned:
105        //          halfway - errors = 124
106        //          halfway + errors = 132
107        //          extra            = 130
108        //          cmp1             = true
109        //          cmp2             = true
110        //      Signed:
111        //          halfway - errors = 124
112        //          halfway + errors = -124
113        //          extra            = -126
114        //          cmp1             = false
115        //          cmp2             = true
116        //
117        // # Conclusion
118        //
119        // Since errors will always be small, and since we want to detect
120        // if the representation is accurate, we need to use an **unsigned**
121        // type for comparisons.
122
123        let extrabits = extrabits as u64;
124        let errors = count as u64;
125        if extrabits > 65 {
126            // Underflow, we have a literal 0.
127            return true;
128        }
129
130        nearest_error_is_accurate(errors, fp, extrabits)
131    }
132}