serde_json/lexical/float.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
// Adapted from https://github.com/Alexhuszagh/rust-lexical.
// FLOAT TYPE
use super::num::*;
use super::rounding::*;
use super::shift::*;
/// Extended precision floating-point type.
///
/// Private implementation, exposed only for testing purposes.
#[doc(hidden)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct ExtendedFloat {
/// Mantissa for the extended-precision float.
pub mant: u64,
/// Binary exponent for the extended-precision float.
pub exp: i32,
}
impl ExtendedFloat {
// PROPERTIES
// OPERATIONS
/// Multiply two normalized extended-precision floats, as if by `a*b`.
///
/// The precision is maximal when the numbers are normalized, however,
/// decent precision will occur as long as both values have high bits
/// set. The result is not normalized.
///
/// Algorithm:
/// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
/// 2. Normalization of the result (not done here).
/// 3. Addition of exponents.
pub(crate) fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
// Logic check, values must be decently normalized prior to multiplication.
debug_assert!((self.mant & u64::HIMASK != 0) && (b.mant & u64::HIMASK != 0));
// Extract high-and-low masks.
let ah = self.mant >> u64::HALF;
let al = self.mant & u64::LOMASK;
let bh = b.mant >> u64::HALF;
let bl = b.mant & u64::LOMASK;
// Get our products
let ah_bl = ah * bl;
let al_bh = al * bh;
let al_bl = al * bl;
let ah_bh = ah * bh;
let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
// round up
tmp += 1 << (u64::HALF - 1);
ExtendedFloat {
mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
exp: self.exp + b.exp + u64::FULL,
}
}
/// Multiply in-place, as if by `a*b`.
///
/// The result is not normalized.
#[inline]
pub(crate) fn imul(&mut self, b: &ExtendedFloat) {
*self = self.mul(b);
}
// NORMALIZE
/// Normalize float-point number.
///
/// Shift the mantissa so the number of leading zeros is 0, or the value
/// itself is 0.
///
/// Get the number of bytes shifted.
#[inline]
pub(crate) fn normalize(&mut self) -> u32 {
// Note:
// Using the cltz intrinsic via leading_zeros is way faster (~10x)
// than shifting 1-bit at a time, via while loop, and also way
// faster (~2x) than an unrolled loop that checks at 32, 16, 4,
// 2, and 1 bit.
//
// Using a modulus of pow2 (which will get optimized to a bitwise
// and with 0x3F or faster) is slightly slower than an if/then,
// however, removing the if/then will likely optimize more branched
// code as it removes conditional logic.
// Calculate the number of leading zeros, and then zero-out
// any overflowing bits, to avoid shl overflow when self.mant == 0.
let shift = if self.mant == 0 {
0
} else {
self.mant.leading_zeros()
};
shl(self, shift as i32);
shift
}
// ROUND
/// Lossy round float-point number to native mantissa boundaries.
#[inline]
pub(crate) fn round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm)
where
F: Float,
Algorithm: FnOnce(&mut ExtendedFloat, i32),
{
round_to_native::<F, _>(self, algorithm);
}
// FROM
/// Create extended float from native float.
#[inline]
pub fn from_float<F: Float>(f: F) -> ExtendedFloat {
from_float(f)
}
// INTO
/// Convert into default-rounded, lower-precision native float.
#[inline]
pub(crate) fn into_float<F: Float>(mut self) -> F {
self.round_to_native::<F, _>(round_nearest_tie_even);
into_float(self)
}
/// Convert into downward-rounded, lower-precision native float.
#[inline]
pub(crate) fn into_downward_float<F: Float>(mut self) -> F {
self.round_to_native::<F, _>(round_downward);
into_float(self)
}
}
// FROM FLOAT
// Import ExtendedFloat from native float.
#[inline]
pub(crate) fn from_float<F>(f: F) -> ExtendedFloat
where
F: Float,
{
ExtendedFloat {
mant: u64::as_cast(f.mantissa()),
exp: f.exponent(),
}
}
// INTO FLOAT
// Export extended-precision float to native float.
//
// The extended-precision float must be in native float representation,
// with overflow/underflow appropriately handled.
#[inline]
pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
where
F: Float,
{
// Export floating-point number.
if fp.mant == 0 || fp.exp < F::DENORMAL_EXPONENT {
// sub-denormal, underflow
F::ZERO
} else if fp.exp >= F::MAX_EXPONENT {
// overflow
F::from_bits(F::INFINITY_BITS)
} else {
// calculate the exp and fraction bits, and return a float from bits.
let exp: u64;
if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == 0 {
exp = 0;
} else {
exp = (fp.exp + F::EXPONENT_BIAS) as u64;
}
let exp = exp << F::MANTISSA_SIZE;
let mant = fp.mant & F::MANTISSA_MASK.as_u64();
F::from_bits(F::Unsigned::as_cast(mant | exp))
}
}