serde_json/lexical/rounding.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
// Adapted from https://github.com/Alexhuszagh/rust-lexical.
//! Defines rounding schemes for floating-point numbers.
use super::float::ExtendedFloat;
use super::num::*;
use super::shift::*;
use core::mem;
// MASKS
/// Calculate a scalar factor of 2 above the halfway point.
#[inline]
pub(crate) fn nth_bit(n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n < bits, "nth_bit() overflow in shl.");
1 << n
}
/// Generate a bitwise mask for the lower `n` bits.
#[inline]
pub(crate) fn lower_n_mask(n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n <= bits, "lower_n_mask() overflow in shl.");
if n == bits {
u64::MAX
} else {
(1 << n) - 1
}
}
/// Calculate the halfway point for the lower `n` bits.
#[inline]
pub(crate) fn lower_n_halfway(n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(n <= bits, "lower_n_halfway() overflow in shl.");
if n == 0 {
0
} else {
nth_bit(n - 1)
}
}
/// Calculate a bitwise mask with `n` 1 bits starting at the `bit` position.
#[inline]
pub(crate) fn internal_n_mask(bit: u64, n: u64) -> u64 {
let bits: u64 = mem::size_of::<u64>() as u64 * 8;
debug_assert!(bit <= bits, "internal_n_halfway() overflow in shl.");
debug_assert!(n <= bits, "internal_n_halfway() overflow in shl.");
debug_assert!(bit >= n, "internal_n_halfway() overflow in sub.");
lower_n_mask(bit) ^ lower_n_mask(bit - n)
}
// NEAREST ROUNDING
// Shift right N-bytes and round to the nearest.
//
// Return if we are above halfway and if we are halfway.
#[inline]
pub(crate) fn round_nearest(fp: &mut ExtendedFloat, shift: i32) -> (bool, bool) {
// Extract the truncated bits using mask.
// Calculate if the value of the truncated bits are either above
// the mid-way point, or equal to it.
//
// For example, for 4 truncated bytes, the mask would be b1111
// and the midway point would be b1000.
let mask: u64 = lower_n_mask(shift as u64);
let halfway: u64 = lower_n_halfway(shift as u64);
let truncated_bits = fp.mant & mask;
let is_above = truncated_bits > halfway;
let is_halfway = truncated_bits == halfway;
// Bit shift so the leading bit is in the hidden bit.
overflowing_shr(fp, shift);
(is_above, is_halfway)
}
// Tie rounded floating point to event.
#[inline]
pub(crate) fn tie_even(fp: &mut ExtendedFloat, is_above: bool, is_halfway: bool) {
// Extract the last bit after shifting (and determine if it is odd).
let is_odd = fp.mant & 1 == 1;
// Calculate if we need to roundup.
// We need to roundup if we are above halfway, or if we are odd
// and at half-way (need to tie-to-even).
if is_above || (is_odd && is_halfway) {
fp.mant += 1;
}
}
// Shift right N-bytes and round nearest, tie-to-even.
//
// Floating-point arithmetic uses round to nearest, ties to even,
// which rounds to the nearest value, if the value is halfway in between,
// round to an even value.
#[inline]
pub(crate) fn round_nearest_tie_even(fp: &mut ExtendedFloat, shift: i32) {
let (is_above, is_halfway) = round_nearest(fp, shift);
tie_even(fp, is_above, is_halfway);
}
// DIRECTED ROUNDING
// Shift right N-bytes and round towards a direction.
//
// Return if we have any truncated bytes.
#[inline]
fn round_toward(fp: &mut ExtendedFloat, shift: i32) -> bool {
let mask: u64 = lower_n_mask(shift as u64);
let truncated_bits = fp.mant & mask;
// Bit shift so the leading bit is in the hidden bit.
overflowing_shr(fp, shift);
truncated_bits != 0
}
// Round down.
#[inline]
fn downard(_: &mut ExtendedFloat, _: bool) {}
// Shift right N-bytes and round toward zero.
//
// Floating-point arithmetic defines round toward zero, which rounds
// towards positive zero.
#[inline]
pub(crate) fn round_downward(fp: &mut ExtendedFloat, shift: i32) {
// Bit shift so the leading bit is in the hidden bit.
// No rounding schemes, so we just ignore everything else.
let is_truncated = round_toward(fp, shift);
downard(fp, is_truncated);
}
// ROUND TO FLOAT
// Shift the ExtendedFloat fraction to the fraction bits in a native float.
//
// Floating-point arithmetic uses round to nearest, ties to even,
// which rounds to the nearest value, if the value is halfway in between,
// round to an even value.
#[inline]
pub(crate) fn round_to_float<F, Algorithm>(fp: &mut ExtendedFloat, algorithm: Algorithm)
where
F: Float,
Algorithm: FnOnce(&mut ExtendedFloat, i32),
{
// Calculate the difference to allow a single calculation
// rather than a loop, to minimize the number of ops required.
// This does underflow detection.
let final_exp = fp.exp + F::DEFAULT_SHIFT;
if final_exp < F::DENORMAL_EXPONENT {
// We would end up with a denormal exponent, try to round to more
// digits. Only shift right if we can avoid zeroing out the value,
// which requires the exponent diff to be < M::BITS. The value
// is already normalized, so we shouldn't have any issue zeroing
// out the value.
let diff = F::DENORMAL_EXPONENT - fp.exp;
if diff <= u64::FULL {
// We can avoid underflow, can get a valid representation.
algorithm(fp, diff);
} else {
// Certain underflow, assign literal 0s.
fp.mant = 0;
fp.exp = 0;
}
} else {
algorithm(fp, F::DEFAULT_SHIFT);
}
if fp.mant & F::CARRY_MASK == F::CARRY_MASK {
// Roundup carried over to 1 past the hidden bit.
shr(fp, 1);
}
}
// AVOID OVERFLOW/UNDERFLOW
// Avoid overflow for large values, shift left as needed.
//
// Shift until a 1-bit is in the hidden bit, if the mantissa is not 0.
#[inline]
pub(crate) fn avoid_overflow<F>(fp: &mut ExtendedFloat)
where
F: Float,
{
// Calculate the difference to allow a single calculation
// rather than a loop, minimizing the number of ops required.
if fp.exp >= F::MAX_EXPONENT {
let diff = fp.exp - F::MAX_EXPONENT;
if diff <= F::MANTISSA_SIZE {
// Our overflow mask needs to start at the hidden bit, or at
// `F::MANTISSA_SIZE+1`, and needs to have `diff+1` bits set,
// to see if our value overflows.
let bit = (F::MANTISSA_SIZE + 1) as u64;
let n = (diff + 1) as u64;
let mask = internal_n_mask(bit, n);
if (fp.mant & mask) == 0 {
// If we have no 1-bit in the hidden-bit position,
// which is index 0, we need to shift 1.
let shift = diff + 1;
shl(fp, shift);
}
}
}
}
// ROUND TO NATIVE
// Round an extended-precision float to a native float representation.
#[inline]
pub(crate) fn round_to_native<F, Algorithm>(fp: &mut ExtendedFloat, algorithm: Algorithm)
where
F: Float,
Algorithm: FnOnce(&mut ExtendedFloat, i32),
{
// Shift all the way left, to ensure a consistent representation.
// The following right-shifts do not work for a non-normalized number.
fp.normalize();
// Round so the fraction is in a native mantissa representation,
// and avoid overflow/underflow.
round_to_float::<F, _>(fp, algorithm);
avoid_overflow::<F>(fp);
}