serde_json/lexical/
float.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
// Adapted from https://github.com/Alexhuszagh/rust-lexical.

// FLOAT TYPE

use super::num::*;
use super::rounding::*;
use super::shift::*;

/// Extended precision floating-point type.
///
/// Private implementation, exposed only for testing purposes.
#[doc(hidden)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct ExtendedFloat {
    /// Mantissa for the extended-precision float.
    pub mant: u64,
    /// Binary exponent for the extended-precision float.
    pub exp: i32,
}

impl ExtendedFloat {
    // PROPERTIES

    // OPERATIONS

    /// Multiply two normalized extended-precision floats, as if by `a*b`.
    ///
    /// The precision is maximal when the numbers are normalized, however,
    /// decent precision will occur as long as both values have high bits
    /// set. The result is not normalized.
    ///
    /// Algorithm:
    ///     1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
    ///     2. Normalization of the result (not done here).
    ///     3. Addition of exponents.
    pub(crate) fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
        // Logic check, values must be decently normalized prior to multiplication.
        debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0));

        // Extract high-and-low masks.
        let ah = self.mant >> u64::HALF;
        let al = self.mant & u64::LOMASK;
        let bh = b.mant >> u64::HALF;
        let bl = b.mant & u64::LOMASK;

        // Get our products
        let ah_bl = ah * bl;
        let al_bh = al * bh;
        let al_bl = al * bl;
        let ah_bh = ah * bh;

        let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
        // round up
        tmp += 1 << (u64::HALF - 1);

        ExtendedFloat {
            mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
            exp: self.exp + b.exp + u64::FULL,
        }
    }

    /// Multiply in-place, as if by `a*b`.
    ///
    /// The result is not normalized.
    #[inline]
    pub(crate) fn imul(&mut self, b: &ExtendedFloat) {
        *self = self.mul(b);
    }

    // NORMALIZE

    /// Normalize float-point number.
    ///
    /// Shift the mantissa so the number of leading zeros is 0, or the value
    /// itself is 0.
    ///
    /// Get the number of bytes shifted.
    #[inline]
    pub(crate) fn normalize(&mut self) -> u32 {
        // Note:
        // Using the cltz intrinsic via leading_zeros is way faster (~10x)
        // than shifting 1-bit at a time, via while loop, and also way
        // faster (~2x) than an unrolled loop that checks at 32, 16, 4,
        // 2, and 1 bit.
        //
        // Using a modulus of pow2 (which will get optimized to a bitwise
        // and with 0x3F or faster) is slightly slower than an if/then,
        // however, removing the if/then will likely optimize more branched
        // code as it removes conditional logic.

        // Calculate the number of leading zeros, and then zero-out
        // any overflowing bits, to avoid shl overflow when self.mant == 0.
        let shift = if self.mant == 0 {
            0
        } else {
            self.mant.leading_zeros()
        };
        shl(self, shift as i32);
        shift
    }

    // ROUND

    /// Lossy round float-point number to native mantissa boundaries.
    #[inline]
    pub(crate) fn round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm)
    where
        F: Float,
        Algorithm: FnOnce(&mut ExtendedFloat, i32),
    {
        round_to_native::<F, _>(self, algorithm);
    }

    // FROM

    /// Create extended float from native float.
    #[inline]
    pub fn from_float<F: Float>(f: F) -> ExtendedFloat {
        from_float(f)
    }

    // INTO

    /// Convert into default-rounded, lower-precision native float.
    #[inline]
    pub(crate) fn into_float<F: Float>(mut self) -> F {
        self.round_to_native::<F, _>(round_nearest_tie_even);
        into_float(self)
    }

    /// Convert into downward-rounded, lower-precision native float.
    #[inline]
    pub(crate) fn into_downward_float<F: Float>(mut self) -> F {
        self.round_to_native::<F, _>(round_downward);
        into_float(self)
    }
}

// FROM FLOAT

// Import ExtendedFloat from native float.
#[inline]
pub(crate) fn from_float<F>(f: F) -> ExtendedFloat
where
    F: Float,
{
    ExtendedFloat {
        mant: u64::as_cast(f.mantissa()),
        exp: f.exponent(),
    }
}

// INTO FLOAT

// Export extended-precision float to native float.
//
// The extended-precision float must be in native float representation,
// with overflow/underflow appropriately handled.
#[inline]
pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
where
    F: Float,
{
    // Export floating-point number.
    if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT {
        // sub-denormal, underflow
        F::ZERO
    } else if fp.exp >= F::MAX_EXPONENT {
        // overflow
        F::from_bits(F::INFINITY_BITS)
    } else {
        // calculate the exp and fraction bits, and return a float from bits.
        let exp: u64;
        if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 {
            exp = 0;
        } else {
            exp = (fp.exp + F::EXPONENT_BIAS) as u64;
        }
        let exp = exp << F::MANTISSA_SIZE;
        let mant = fp.mant & F::MANTISSA_MASK.as_u64();
        F::from_bits(F::Unsigned::as_cast(mant | exp))
    }
}