serde_json/lexical/
float.rs

1// Adapted from https://github.com/Alexhuszagh/rust-lexical.
2
3// FLOAT TYPE
4
5use super::num::*;
6use super::rounding::*;
7use super::shift::*;
8
9/// Extended precision floating-point type.
10///
11/// Private implementation, exposed only for testing purposes.
12#[doc(hidden)]
13#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14pub(crate) struct ExtendedFloat {
15    /// Mantissa for the extended-precision float.
16    pub mant: u64,
17    /// Binary exponent for the extended-precision float.
18    pub exp: i32,
19}
20
21impl ExtendedFloat {
22    // PROPERTIES
23
24    // OPERATIONS
25
26    /// Multiply two normalized extended-precision floats, as if by `a*b`.
27    ///
28    /// The precision is maximal when the numbers are normalized, however,
29    /// decent precision will occur as long as both values have high bits
30    /// set. The result is not normalized.
31    ///
32    /// Algorithm:
33    ///     1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
34    ///     2. Normalization of the result (not done here).
35    ///     3. Addition of exponents.
36    pub(crate) fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
37        // Logic check, values must be decently normalized prior to multiplication.
38        debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0));
39
40        // Extract high-and-low masks.
41        let ah = self.mant >> u64::HALF;
42        let al = self.mant & u64::LOMASK;
43        let bh = b.mant >> u64::HALF;
44        let bl = b.mant & u64::LOMASK;
45
46        // Get our products
47        let ah_bl = ah * bl;
48        let al_bh = al * bh;
49        let al_bl = al * bl;
50        let ah_bh = ah * bh;
51
52        let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
53        // round up
54        tmp += 1 << (u64::HALF - 1);
55
56        ExtendedFloat {
57            mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
58            exp: self.exp + b.exp + u64::FULL,
59        }
60    }
61
62    /// Multiply in-place, as if by `a*b`.
63    ///
64    /// The result is not normalized.
65    #[inline]
66    pub(crate) fn imul(&mut self, b: &ExtendedFloat) {
67        *self = self.mul(b);
68    }
69
70    // NORMALIZE
71
72    /// Normalize float-point number.
73    ///
74    /// Shift the mantissa so the number of leading zeros is 0, or the value
75    /// itself is 0.
76    ///
77    /// Get the number of bytes shifted.
78    #[inline]
79    pub(crate) fn normalize(&mut self) -> u32 {
80        // Note:
81        // Using the cltz intrinsic via leading_zeros is way faster (~10x)
82        // than shifting 1-bit at a time, via while loop, and also way
83        // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
84        // 2, and 1 bit.
85        //
86        // Using a modulus of pow2 (which will get optimized to a bitwise
87        // and with 0x3F or faster) is slightly slower than an if/then,
88        // however, removing the if/then will likely optimize more branched
89        // code as it removes conditional logic.
90
91        // Calculate the number of leading zeros, and then zero-out
92        // any overflowing bits, to avoid shl overflow when self.mant == 0.
93        let shift = if self.mant == 0 {
94            0
95        } else {
96            self.mant.leading_zeros()
97        };
98        shl(self, shift as i32);
99        shift
100    }
101
102    // ROUND
103
104    /// Lossy round float-point number to native mantissa boundaries.
105    #[inline]
106    pub(crate) fn round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm)
107    where
108        F: Float,
109        Algorithm: FnOnce(&mut ExtendedFloat, i32),
110    {
111        round_to_native::<F, _>(self, algorithm);
112    }
113
114    // FROM
115
116    /// Create extended float from native float.
117    #[inline]
118    pub fn from_float<F: Float>(f: F) -> ExtendedFloat {
119        from_float(f)
120    }
121
122    // INTO
123
124    /// Convert into default-rounded, lower-precision native float.
125    #[inline]
126    pub(crate) fn into_float<F: Float>(mut self) -> F {
127        self.round_to_native::<F, _>(round_nearest_tie_even);
128        into_float(self)
129    }
130
131    /// Convert into downward-rounded, lower-precision native float.
132    #[inline]
133    pub(crate) fn into_downward_float<F: Float>(mut self) -> F {
134        self.round_to_native::<F, _>(round_downward);
135        into_float(self)
136    }
137}
138
139// FROM FLOAT
140
141// Import ExtendedFloat from native float.
142#[inline]
143pub(crate) fn from_float<F>(f: F) -> ExtendedFloat
144where
145    F: Float,
146{
147    ExtendedFloat {
148        mant: u64::as_cast(f.mantissa()),
149        exp: f.exponent(),
150    }
151}
152
153// INTO FLOAT
154
155// Export extended-precision float to native float.
156//
157// The extended-precision float must be in native float representation,
158// with overflow/underflow appropriately handled.
159#[inline]
160pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
161where
162    F: Float,
163{
164    // Export floating-point number.
165    if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT {
166        // sub-denormal, underflow
167        F::ZERO
168    } else if fp.exp >= F::MAX_EXPONENT {
169        // overflow
170        F::from_bits(F::INFINITY_BITS)
171    } else {
172        // calculate the exp and fraction bits, and return a float from bits.
173        let exp: u64;
174        if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 {
175            exp = 0;
176        } else {
177            exp = (fp.exp + F::EXPONENT_BIAS) as u64;
178        }
179        let exp = exp << F::MANTISSA_SIZE;
180        let mant = fp.mant & F::MANTISSA_MASK.as_u64();
181        F::from_bits(F::Unsigned::as_cast(mant | exp))
182    }
183}