lexical_parse_float/
shared.rs

Help
1//! Shared utilities and algorithms.
2
3#![doc(hidden)]
4
5#[cfg(feature = "power-of-two")]
6use lexical_util::format::NumberFormat;
7use lexical_util::num::AsPrimitive;
8
9use crate::float::{ExtendedFloat80, RawFloat};
10use crate::mask::{lower_n_halfway, lower_n_mask};
11
12// 8 DIGIT
13// -------
14
15/// Check if we can try to parse 8 digits at one.
16#[cfg(not(feature = "compact"))]
17macro_rules! can_try_parse_multidigit {
18    ($iter:expr, $radix:expr) => {
19        $iter.is_contiguous() && (cfg!(not(feature = "power-of-two")) || $radix <= 10)
20    };
21}
22
23// POWER2
24// ------
25
26/// Calculate the shift to move the significant digits into place.
27#[inline(always)]
28pub fn calculate_shift<F: RawFloat>(power2: i32) -> i32 {
29    let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
30    if -power2 >= mantissa_shift {
31        -power2 + 1
32    } else {
33        mantissa_shift
34    }
35}
36
37/// Calculate the biased, binary exponent from the mantissa shift and exponent.
38#[inline(always)]
39#[cfg(feature = "power-of-two")]
40pub fn calculate_power2<F: RawFloat, const FORMAT: u128>(exponent: i64, ctlz: u32) -> i32 {
41    let format = NumberFormat::<{ FORMAT }> {};
42    exponent as i32 * log2(format.exponent_base()) + F::EXPONENT_BIAS - ctlz as i32
43}
44
45/// Bias for marking an invalid extended float.
46pub const INVALID_FP: i32 = i16::MIN as i32;
47
48// LOG2
49// ----
50
51/// Quick log2 that evaluates at compile time for the radix.
52/// Note that this may produce inaccurate results for other radixes:
53/// we don't care since it's only called for powers-of-two.
54#[inline(always)]
55pub const fn log2(radix: u32) -> i32 {
56    match radix {
57        2 => 1,
58        4 => 2,
59        8 => 3,
60        16 => 4,
61        32 => 5,
62        // Fallthrough to 1 for non-power-of-two radixes.
63        _ => 1,
64    }
65}
66
67// STARTS WITH
68// -----------
69
70/// Check if left iter starts with right iter.
71///
72/// This optimizes decently well, to the following ASM for pure slices:
73///
74/// ```text
75/// starts_with_slc:
76///         xor     eax, eax
77/// .LBB0_1:
78///         cmp     rcx, rax
79///         je      .LBB0_2
80///         cmp     rsi, rax
81///         je      .LBB0_5
82///         movzx   r8d, byte ptr [rdi + rax]
83///         lea     r9, [rax + 1]
84///         cmp     r8b, byte ptr [rdx + rax]
85///         mov     rax, r9
86///         je      .LBB0_1
87/// .LBB0_5:
88///         xor     eax, eax
89///         ret
90/// .LBB0_2:
91///         mov     al, 1
92///         ret
93/// ```
94#[cfg_attr(not(feature = "compact"), inline(always))]
95pub fn starts_with<'a, 'b, Iter1, Iter2>(mut x: Iter1, mut y: Iter2) -> bool
96where
97    Iter1: Iterator<Item = &'a u8>,
98    Iter2: Iterator<Item = &'b u8>,
99{
100    loop {
101        // Only call `next()` on x if y is not None, otherwise,
102        // we may incorrectly consume an x character.
103        let yi = y.next();
104        if yi.is_none() {
105            return true;
106        } else if x.next() != yi {
107            return false;
108        }
109    }
110}
111
112/// Check if left iter starts with right iter without case-sensitivity.
113///
114/// This optimizes decently well, to the following ASM for pure slices:
115///
116/// ```text
117/// starts_with_uncased:
118///         xor     eax, eax
119/// .LBB1_1:
120///         cmp     rcx, rax
121///         je      .LBB1_2
122///         cmp     rsi, rax
123///         je      .LBB1_5
124///         movzx   r8d, byte ptr [rdi + rax]
125///         xor     r8b, byte ptr [rdx + rax]
126///         add     rax, 1
127///         test    r8b, -33
128///         je      .LBB1_1
129/// .LBB1_5:
130///         xor     eax, eax
131///         ret
132/// .LBB1_2:
133///         mov     al, 1
134///         ret
135/// ```
136#[cfg_attr(not(feature = "compact"), inline(always))]
137#[allow(clippy::unwrap_used)] // reason="yi cannot be none due to previous check"
138pub fn starts_with_uncased<'a, 'b, Iter1, Iter2>(mut x: Iter1, mut y: Iter2) -> bool
139where
140    Iter1: Iterator<Item = &'a u8>,
141    Iter2: Iterator<Item = &'b u8>,
142{
143    // We use a faster optimization here for ASCII letters, which NaN
144    // and infinite strings **must** be. [A-Z] is 0x41-0x5A, while
145    // [a-z] is 0x61-0x7A. Therefore, the xor must be 0 or 32 if they
146    // are case-insensitive equal, but only if at least 1 of the inputs
147    // is an ASCII letter.
148    loop {
149        let yi = y.next();
150        if yi.is_none() {
151            return true;
152        }
153        let yi = *yi.unwrap();
154        let is_not_equal = x.next().map_or(true, |&xi| {
155            let xor = xi ^ yi;
156            xor != 0 && xor != 0x20
157        });
158        if is_not_equal {
159            return false;
160        }
161    }
162}
163
164// ROUNDING
165// --------
166
167/// Round an extended-precision float to the nearest machine float.
168///
169/// Shifts the significant digits into place, adjusts the exponent,
170/// so it can be easily converted to a native float.
171#[cfg_attr(not(feature = "compact"), inline(always))]
172pub fn round<F, Cb>(fp: &mut ExtendedFloat80, cb: Cb)
173where
174    F: RawFloat,
175    Cb: Fn(&mut ExtendedFloat80, i32),
176{
177    let fp_inf = ExtendedFloat80 {
178        mant: 0,
179        exp: F::INFINITE_POWER,
180    };
181
182    // Calculate our shift in significant digits.
183    let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
184
185    // Check for a denormal float, if after the shift the exponent is negative.
186    if -fp.exp >= mantissa_shift {
187        // Have a denormal float that isn't a literal 0.
188        // The extra 1 is to adjust for the denormal float, which is
189        // `1 - F::EXPONENT_BIAS`. This works as before, because our
190        // old logic rounded to `F::DENORMAL_EXPONENT` (now 1), and then
191        // checked if `exp == F::DENORMAL_EXPONENT` and no hidden mask
192        // bit was set. Here, we handle that here, rather than later.
193        //
194        // This might round-down to 0, but shift will be at **max** 65,
195        // for halfway cases rounding towards 0.
196        let shift = -fp.exp + 1;
197        debug_assert!(shift <= 65);
198        cb(fp, shift.min(64));
199        // Check for round-up: if rounding-nearest carried us to the hidden bit.
200        fp.exp = (fp.mant >= F::HIDDEN_BIT_MASK.as_u64()) as i32;
201        return;
202    }
203
204    // The float is normal, round to the hidden bit.
205    cb(fp, mantissa_shift);
206
207    // Check if we carried, and if so, shift the bit to the hidden bit.
208    let carry_mask = F::CARRY_MASK.as_u64();
209    if fp.mant & carry_mask == carry_mask {
210        fp.mant >>= 1;
211        fp.exp += 1;
212    }
213
214    // Handle if we carried and check for overflow again.
215    if fp.exp >= F::INFINITE_POWER {
216        // Exponent is above largest normal value, must be infinite.
217        *fp = fp_inf;
218        return;
219    }
220
221    // Remove the hidden bit.
222    fp.mant &= F::MANTISSA_MASK.as_u64();
223}
224
225/// Shift right N-bytes and round towards a direction.
226///
227/// Callback should take the following parameters:
228///     1. `is_odd`
229///     1. `is_halfway`
230///     1. `is_above`
231#[cfg_attr(not(feature = "compact"), inline(always))]
232pub fn round_nearest_tie_even<Cb>(fp: &mut ExtendedFloat80, shift: i32, cb: Cb)
233where
234    // `is_odd`, `is_halfway`, `is_above`
235    Cb: Fn(bool, bool, bool) -> bool,
236{
237    // Ensure we've already handled denormal values that underflow.
238    debug_assert!(shift <= 64);
239
240    // Extract the truncated bits using mask.
241    // Calculate if the value of the truncated bits are either above
242    // the mid-way point, or equal to it.
243    //
244    // For example, for 4 truncated bytes, the mask would be 0b1111
245    // and the midway point would be 0b1000.
246    let mask = lower_n_mask(shift as u64);
247    let halfway = lower_n_halfway(shift as u64);
248    let truncated_bits = fp.mant & mask;
249    let is_above = truncated_bits > halfway;
250    let is_halfway = truncated_bits == halfway;
251
252    // Bit shift so the leading bit is in the hidden bit.
253    // This optimizes pretty well:
254    //  ```text
255    //   mov     ecx, esi
256    //   shr     rdi, cl
257    //   xor     eax, eax
258    //   cmp     esi, 64
259    //   cmovne  rax, rdi
260    //   ret
261    //  ```
262    fp.mant = match shift == 64 {
263        true => 0,
264        false => fp.mant >> shift,
265    };
266    fp.exp += shift;
267
268    // Extract the last bit after shifting (and determine if it is odd).
269    let is_odd = fp.mant & 1 == 1;
270
271    // Calculate if we need to roundup.
272    // We need to roundup if we are above halfway, or if we are odd
273    // and at half-way (need to tie-to-even). Avoid the branch here.
274    fp.mant += cb(is_odd, is_halfway, is_above) as u64;
275}
276
277/// Round our significant digits into place, truncating them.
278#[cfg_attr(not(feature = "compact"), inline(always))]
279pub fn round_down(fp: &mut ExtendedFloat80, shift: i32) {
280    // Might have a shift greater than 64 if we have an error.
281    fp.mant = match shift == 64 {
282        true => 0,
283        false => fp.mant >> shift,
284    };
285    fp.exp += shift;
286}
lexical_parse_float/shared.rs

lexical_parse_float/
shared.rs