lexical_parse_float/
parse.rs

Help
1//! Shared trait and methods for parsing floats.
2//!
3//! This is adapted from [fast-float-rust](https://github.com/aldanor/fast-float-rust),
4//! a port of [fast_float](https://github.com/fastfloat/fast_float) to Rust.
5
6// NOTE: We never want to disable multi-digit optimizations when parsing our floats,
7// since the nanoseconds it saves on branching is irrelevant when considering decimal
8// points and fractional digits and it majorly improves longer floats.
9
10#![doc(hidden)]
11
12#[cfg(not(feature = "compact"))]
13use lexical_parse_integer::algorithm;
14#[cfg(feature = "f16")]
15use lexical_util::bf16::bf16;
16use lexical_util::digit::{char_to_digit_const, char_to_valid_digit_const};
17use lexical_util::error::Error;
18#[cfg(feature = "f16")]
19use lexical_util::f16::f16;
20use lexical_util::format::NumberFormat;
21use lexical_util::iterator::{AsBytes, Bytes, DigitsIter, Iter};
22use lexical_util::result::Result;
23use lexical_util::step::u64_step;
24
25#[cfg(any(feature = "compact", feature = "radix"))]
26use crate::bellerophon::bellerophon;
27#[cfg(feature = "power-of-two")]
28use crate::binary::{binary, slow_binary};
29use crate::float::{extended_to_float, ExtendedFloat80, LemireFloat};
30#[cfg(not(feature = "compact"))]
31use crate::lemire::lemire;
32use crate::number::Number;
33use crate::options::Options;
34use crate::shared;
35use crate::slow::slow_radix;
36
37// API
38// ---
39
40/// Check f radix is a power-of-2.
41#[cfg(feature = "power-of-two")]
42macro_rules! is_power_two {
43    ($radix:expr) => {
44        matches!($radix, 2 | 4 | 8 | 16 | 32)
45    };
46}
47
48/// Check if the radix is valid and error otherwise
49macro_rules! check_radix {
50    ($format:ident) => {{
51        let format = NumberFormat::<{ $format }> {};
52        #[cfg(feature = "power-of-two")]
53        {
54            if format.radix() != format.exponent_base() {
55                let valid_radix = matches!(
56                    (format.radix(), format.exponent_base()),
57                    (4, 2) | (8, 2) | (16, 2) | (32, 2) | (16, 4)
58                );
59                if !valid_radix {
60                    return Err(Error::InvalidRadix);
61                }
62            }
63        }
64
65        #[cfg(not(feature = "power-of-two"))]
66        {
67            if format.radix() != format.exponent_base() {
68                return Err(Error::InvalidRadix);
69            }
70        }
71    }};
72}
73
74/// Parse integer trait, implemented in terms of the optimized back-end.
75pub trait ParseFloat: LemireFloat {
76    /// Forward complete parser parameters to the backend.
77    #[cfg_attr(not(feature = "compact"), inline(always))]
78    fn parse_complete<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<Self> {
79        check_radix!(FORMAT);
80        parse_complete::<Self, FORMAT>(bytes, options)
81    }
82
83    /// Forward partial parser parameters to the backend.
84    #[cfg_attr(not(feature = "compact"), inline(always))]
85    fn parse_partial<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<(Self, usize)> {
86        check_radix!(FORMAT);
87        parse_partial::<Self, FORMAT>(bytes, options)
88    }
89
90    /// Forward complete parser parameters to the backend, using only the fast
91    /// path.
92    #[cfg_attr(not(feature = "compact"), inline(always))]
93    fn fast_path_complete<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<Self> {
94        check_radix!(FORMAT);
95        fast_path_complete::<Self, FORMAT>(bytes, options)
96    }
97
98    /// Forward partial parser parameters to the backend, using only the fast
99    /// path.
100    #[cfg_attr(not(feature = "compact"), inline(always))]
101    fn fast_path_partial<const FORMAT: u128>(
102        bytes: &[u8],
103        options: &Options,
104    ) -> Result<(Self, usize)> {
105        check_radix!(FORMAT);
106        fast_path_partial::<Self, FORMAT>(bytes, options)
107    }
108}
109
110macro_rules! parse_float_impl {
111    ($($t:ty)*) => ($(
112        impl ParseFloat for $t {}
113    )*)
114}
115
116parse_float_impl! { f32 f64 }
117
118#[cfg(feature = "f16")]
119macro_rules! parse_float_as_f32 {
120    ($($t:ty)*) => ($(
121        impl ParseFloat for $t {
122            #[cfg_attr(not(feature = "compact"), inline(always))]
123            fn parse_complete<const FORMAT: u128>(bytes: &[u8], options: &Options)
124                -> Result<Self>
125            {
126                Ok(Self::from_f32(parse_complete::<f32, FORMAT>(bytes, options)?))
127            }
128
129            #[cfg_attr(not(feature = "compact"), inline(always))]
130            fn parse_partial<const FORMAT: u128>(bytes: &[u8], options: &Options)
131                -> Result<(Self, usize)>
132            {
133                let (float, count) = parse_partial::<f32, FORMAT>(bytes, options)?;
134                Ok((Self::from_f32(float), count))
135            }
136
137            #[cfg_attr(not(feature = "compact"), inline(always))]
138            fn fast_path_complete<const FORMAT: u128>(bytes: &[u8], options: &Options)
139                -> Result<Self>
140            {
141                Ok(Self::from_f32(fast_path_complete::<f32, FORMAT>(bytes, options)?))
142            }
143
144            #[cfg_attr(not(feature = "compact"), inline(always))]
145            fn fast_path_partial<const FORMAT: u128>(bytes: &[u8], options: &Options)
146                -> Result<(Self, usize)>
147            {
148                let (float, count) = fast_path_partial::<f32, FORMAT>(bytes, options)?;
149                Ok((Self::from_f32(float), count))
150            }
151        }
152    )*)
153}
154
155#[cfg(feature = "f16")]
156parse_float_as_f32! { bf16 f16 }
157
158// PARSE
159// -----
160
161// NOTE:
162//  The partial and complete parsers are done separately because it provides
163//  minor optimizations when parsing invalid input, and the logic is slightly
164//  different internally. Most of the code is shared, so the duplicated
165//  code is only like 30 lines.
166
167/// Parse the sign from the leading digits.
168#[cfg_attr(not(feature = "compact"), inline(always))]
169pub fn parse_mantissa_sign<const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
170    let format = NumberFormat::<{ FORMAT }> {};
171    parse_sign!(
172        byte,
173        true,
174        format.no_positive_mantissa_sign(),
175        format.required_mantissa_sign(),
176        InvalidPositiveSign,
177        MissingSign
178    )
179}
180
181/// Parse the sign from the leading digits.
182#[cfg_attr(not(feature = "compact"), inline(always))]
183pub fn parse_exponent_sign<const FORMAT: u128>(byte: &mut Bytes<'_, FORMAT>) -> Result<bool> {
184    let format = NumberFormat::<{ FORMAT }> {};
185    parse_sign!(
186        byte,
187        true,
188        format.no_positive_exponent_sign(),
189        format.required_exponent_sign(),
190        InvalidPositiveExponentSign,
191        MissingExponentSign
192    )
193}
194
195/// Utility to extract the result and handle any errors from parsing a `Number`.
196///
197/// - `format` - The numerical format as a packed integer
198/// - `byte` - The `DigitsIter` iterator
199/// - `is_negative` - If the final value is negative
200/// - `parse_normal` - The function to parse non-special numbers with
201/// - `parse_special` - The function to parse special numbers with
202macro_rules! parse_number {
203    (
204        $format:ident,
205        $byte:ident,
206        $is_negative:ident,
207        $options:ident,
208        $parse_normal:ident,
209        $parse_special:ident
210    ) => {{
211        match $parse_normal::<$format>($byte.clone(), $is_negative, $options) {
212            Ok(n) => n,
213            Err(e) => {
214                if let Some(value) =
215                    $parse_special::<_, $format>($byte.clone(), $is_negative, $options)
216                {
217                    return Ok(value);
218                } else {
219                    return Err(e);
220                }
221            },
222        }
223    }};
224}
225
226/// Convert extended float to native.
227///
228/// - `type` - The native floating point type.
229/// - `fp` - The extended floating-point representation.
230macro_rules! to_native {
231    ($type:ident, $fp:ident, $is_negative:ident) => {{
232        let mut float = extended_to_float::<$type>($fp);
233        if $is_negative {
234            float = -float;
235        }
236        float
237    }};
238}
239
240/// Parse a float from bytes using a complete parser.
241#[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
242pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
243    bytes: &[u8],
244    options: &Options,
245) -> Result<F> {
246    let mut byte = bytes.bytes::<{ FORMAT }>();
247    let is_negative = parse_mantissa_sign(&mut byte)?;
248    if byte.integer_iter().is_consumed() {
249        if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
250            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
251        {
252            return Err(Error::Empty(byte.cursor()));
253        } else {
254            return Ok(F::ZERO);
255        }
256    }
257
258    // Parse our a small representation of our number.
259    let num: Number<'_> =
260        parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special);
261    // Try the fast-path algorithm.
262    if let Some(value) = num.try_fast_path::<_, FORMAT>() {
263        return Ok(value);
264    }
265    // Now try the moderate path algorithm.
266    let mut fp = moderate_path::<F, FORMAT>(&num, options.lossy());
267
268    // Unable to correctly round the float using the fast or moderate algorithms.
269    // Fallback to a slower, but always correct algorithm. If we have
270    // lossy, we can't be here.
271    if fp.exp < 0 {
272        debug_assert!(!options.lossy(), "lossy algorithms never use slow algorithms");
273        // Undo the invalid extended float biasing.
274        fp.exp -= shared::INVALID_FP;
275        fp = slow_path::<F, FORMAT>(num, fp);
276    }
277
278    // Convert to native float and return result.
279    Ok(to_native!(F, fp, is_negative))
280}
281
282/// Parse a float using only the fast path as a complete parser.
283#[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
284pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
285    bytes: &[u8],
286    options: &Options,
287) -> Result<F> {
288    let mut byte = bytes.bytes::<{ FORMAT }>();
289    let is_negative = parse_mantissa_sign(&mut byte)?;
290    if byte.integer_iter().is_consumed() {
291        if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
292            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
293        {
294            return Err(Error::Empty(byte.cursor()));
295        } else {
296            return Ok(F::ZERO);
297        }
298    }
299
300    // Parse our a small representation of our number.
301    let num =
302        parse_number!(FORMAT, byte, is_negative, options, parse_complete_number, parse_special);
303    Ok(num.force_fast_path::<_, FORMAT>())
304}
305
306/// Parse a float from bytes using a partial parser.
307#[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
308pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
309    bytes: &[u8],
310    options: &Options,
311) -> Result<(F, usize)> {
312    let mut byte = bytes.bytes::<{ FORMAT }>();
313    let is_negative = parse_mantissa_sign(&mut byte)?;
314    if byte.integer_iter().is_consumed() {
315        if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
316            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
317        {
318            return Err(Error::Empty(byte.cursor()));
319        } else {
320            return Ok((F::ZERO, byte.cursor()));
321        }
322    }
323
324    // Parse our a small representation of our number.
325    let (num, count) = parse_number!(
326        FORMAT,
327        byte,
328        is_negative,
329        options,
330        parse_partial_number,
331        parse_partial_special
332    );
333    // Try the fast-path algorithm.
334    if let Some(value) = num.try_fast_path::<_, FORMAT>() {
335        return Ok((value, count));
336    }
337    // Now try the moderate path algorithm.
338    let mut fp = moderate_path::<F, FORMAT>(&num, options.lossy());
339
340    // Unable to correctly round the float using the fast or moderate algorithms.
341    // Fallback to a slower, but always correct algorithm. If we have
342    // lossy, we can't be here.
343    if fp.exp < 0 {
344        debug_assert!(!options.lossy(), "lossy algorithms never use slow algorithms");
345        // Undo the invalid extended float biasing.
346        fp.exp -= shared::INVALID_FP;
347        fp = slow_path::<F, FORMAT>(num, fp);
348    }
349
350    // Convert to native float and return result.
351    Ok((to_native!(F, fp, is_negative), count))
352}
353
354/// Parse a float using only the fast path as a partial parser.
355#[allow(clippy::missing_inline_in_public_items)] // reason = "only public for testing"
356pub fn fast_path_partial<F: LemireFloat, const FORMAT: u128>(
357    bytes: &[u8],
358    options: &Options,
359) -> Result<(F, usize)> {
360    let mut byte = bytes.bytes::<{ FORMAT }>();
361    let is_negative = parse_mantissa_sign(&mut byte)?;
362    if byte.integer_iter().is_consumed() {
363        if NumberFormat::<FORMAT>::REQUIRED_INTEGER_DIGITS
364            || NumberFormat::<FORMAT>::REQUIRED_MANTISSA_DIGITS
365        {
366            return Err(Error::Empty(byte.cursor()));
367        } else {
368            return Ok((F::ZERO, byte.cursor()));
369        }
370    }
371
372    // Parse our a small representation of our number.
373    let (num, count) = parse_number!(
374        FORMAT,
375        byte,
376        is_negative,
377        options,
378        parse_partial_number,
379        parse_partial_special
380    );
381    Ok((num.force_fast_path::<_, FORMAT>(), count))
382}
383
384// PATHS
385// -----
386
387/// Wrapper for different moderate-path algorithms.
388/// A return exponent of `-1` indicates an invalid value.
389#[must_use]
390#[inline(always)]
391pub fn moderate_path<F: LemireFloat, const FORMAT: u128>(
392    num: &Number,
393    lossy: bool,
394) -> ExtendedFloat80 {
395    #[cfg(feature = "compact")]
396    {
397        #[cfg(feature = "power-of-two")]
398        {
399            let format = NumberFormat::<{ FORMAT }> {};
400            if is_power_two!(format.mantissa_radix()) {
401                // Implement the power-of-two backends.
402                binary::<F, FORMAT>(num, lossy)
403            } else {
404                bellerophon::<F, FORMAT>(num, lossy)
405            }
406        }
407
408        #[cfg(not(feature = "power-of-two"))]
409        {
410            bellerophon::<F, FORMAT>(num, lossy)
411        }
412    }
413
414    #[cfg(not(feature = "compact"))]
415    {
416        #[cfg(feature = "radix")]
417        {
418            let format = NumberFormat::<{ FORMAT }> {};
419            let radix = format.mantissa_radix();
420            if radix == 10 {
421                lemire::<F>(num, lossy)
422            } else if is_power_two!(radix) {
423                // Implement the power-of-two backends.
424                binary::<F, FORMAT>(num, lossy)
425            } else {
426                bellerophon::<F, FORMAT>(num, lossy)
427            }
428        }
429
430        #[cfg(all(feature = "power-of-two", not(feature = "radix")))]
431        {
432            let format = NumberFormat::<{ FORMAT }> {};
433            let radix = format.mantissa_radix();
434            debug_assert!(matches!(radix, 2 | 4 | 8 | 10 | 16 | 32));
435            if radix == 10 {
436                lemire::<F>(num, lossy)
437            } else {
438                // Implement the power-of-two backends.
439                binary::<F, FORMAT>(num, lossy)
440            }
441        }
442
443        #[cfg(not(feature = "power-of-two"))]
444        {
445            lemire::<F>(num, lossy)
446        }
447    }
448}
449
450/// Invoke the slow path.
451/// At this point, the float string has already been validated.
452#[must_use]
453#[inline(always)]
454pub fn slow_path<F: LemireFloat, const FORMAT: u128>(
455    num: Number,
456    fp: ExtendedFloat80,
457) -> ExtendedFloat80 {
458    #[cfg(not(feature = "power-of-two"))]
459    {
460        slow_radix::<F, FORMAT>(num, fp)
461    }
462
463    #[cfg(feature = "power-of-two")]
464    {
465        let format = NumberFormat::<{ FORMAT }> {};
466        if is_power_two!(format.mantissa_radix()) {
467            slow_binary::<F, FORMAT>(num)
468        } else {
469            slow_radix::<F, FORMAT>(num, fp)
470        }
471    }
472}
473
474// NUMBER
475// ------
476
477/// Parse a partial, non-special floating point number.
478///
479/// This creates a representation of the float as the
480/// significant digits and the decimal exponent.
481#[cfg_attr(not(feature = "compact"), inline(always))]
482#[allow(unused_mut)] // reason = "used when format is enabled"
483#[allow(clippy::unwrap_used)] // reason = "developer error if we incorrectly assume an overflow"
484#[allow(clippy::collapsible_if)] // reason = "more readable uncollapsed"
485#[allow(clippy::cast_possible_wrap)] // reason = "no hardware supports buffers >= i64::MAX"
486#[allow(clippy::too_many_lines)] // reason = "function is one logical entity"
487pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
488    mut byte: Bytes<'a, FORMAT>,
489    is_negative: bool,
490    options: &Options,
491) -> Result<(Number<'a>, usize)> {
492    //  NOTE:
493    //      There are no satisfactory optimizations to reduce the number
494    //      of multiplications for very long input strings, but this will
495    //      be a small fraction of the performance penalty anyway.
496    //
497    //      We've tried:
498    //          - checking for explicit overflow, via `overflowing_mul`.
499    //          - counting the max number of steps.
500    //          - subslicing the string, and only processing the first `step`
501    //            digits.
502    //          - pre-computing the maximum power, and only adding until then.
503    //
504    //      All of these lead to substantial performance penalty.
505    //      If we pre-parse the string, then only process it then, we
506    //      get a performance penalty of ~2.5x (20ns to 50ns) for common
507    //      floats, an unacceptable cost, while only improving performance
508    //      for rare floats 5-25% (9.3µs to 7.5µs for denormal with 6400
509    //      digits, and 7.8µs to 7.4µs for large floats with 6400 digits).
510    //
511    //      The performance cost is **almost** entirely in this function,
512    //      but additional branching **does** not improve performance,
513    //      and pre-tokenization is a recipe for failure. For halfway
514    //      cases with smaller numbers of digits, the majority of the
515    //      performance cost is in the big integer arithmetic (`pow` and
516    //      `parse_mantissa`), which suggests few optimizations can or should
517    //      be made.
518
519    // Config options
520    let format = NumberFormat::<{ FORMAT }> {};
521    let decimal_point = options.decimal_point();
522    let exponent_character = options.exponent();
523    debug_assert!(format.is_valid(), "should have already checked for an invalid number format");
524    debug_assert!(!byte.is_buffer_empty(), "should have previously checked for empty input");
525    let bits_per_digit = shared::log2(format.mantissa_radix()) as i64;
526    let bits_per_base = shared::log2(format.exponent_base()) as i64;
527
528    // INTEGER
529
530    // Check to see if we have a valid base prefix.
531    #[allow(unused_variables)]
532    let mut is_prefix = false;
533    #[cfg(feature = "format")]
534    {
535        let base_prefix = format.base_prefix();
536        let mut iter = byte.integer_iter();
537        if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() {
538            // Check to see if the next character is the base prefix.
539            // We must have a format like `0x`, `0d`, `0o`.
540            // NOTE: The check for empty integer digits happens below so
541            // we don't need a redundant check here.
542            is_prefix = true;
543            if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some()
544                && iter.is_buffer_empty()
545                && format.required_integer_digits()
546            {
547                return Err(Error::EmptyInteger(iter.cursor()));
548            }
549        }
550    }
551
552    // Parse our integral digits.
553    let mut mantissa = 0_u64;
554    let start = byte.clone();
555    #[cfg(not(feature = "compact"))]
556    parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa);
557    parse_digits::<_, _, FORMAT>(byte.integer_iter(), |digit| {
558        mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
559    });
560    let mut n_digits = byte.current_count() - start.current_count();
561    #[cfg(feature = "format")]
562    if format.required_integer_digits() && n_digits == 0 {
563        return Err(Error::EmptyInteger(byte.cursor()));
564    }
565
566    // Store the integer digits for slow-path algorithms.
567    // NOTE: We can't use the number of digits to extract the slice for
568    // non-contiguous iterators, but we also need to the number of digits
569    // for our value calculation. We store both, and let the compiler know
570    // to optimize it out when not needed.
571    let b_digits = if cfg!(feature = "format") && !byte.integer_iter().is_contiguous() {
572        byte.cursor() - start.cursor()
573    } else {
574        n_digits
575    };
576    debug_assert!(
577        b_digits <= start.as_slice().len(),
578        "number of digits parsed must <= buffer length"
579    );
580    // SAFETY: safe, since `n_digits <= start.as_slice().len()`.
581    // This is since `byte.len() >= start.len()` but has to have
582    // the same end bounds (that is, `start = byte.clone()`), so
583    // `0 <= byte.current_count() <= start.current_count() <= start.lent()`
584    // so, this will always return only the integer digits.
585    //
586    // NOTE: Removing this code leads to ~10% reduction in parsing
587    // that triggers the Eisell-Lemire algorithm or the digit comp
588    // algorithms, so don't remove the unsafe indexing.
589    let integer_digits = unsafe { start.as_slice().get_unchecked(..b_digits) };
590
591    // Check if integer leading zeros are disabled.
592    #[cfg(feature = "format")]
593    if !is_prefix && format.no_float_leading_zeros() {
594        if integer_digits.len() > 1 && integer_digits.first() == Some(&b'0') {
595            return Err(Error::InvalidLeadingZeros(start.cursor()));
596        }
597    }
598
599    // FRACTION
600
601    // Handle decimal point and digits afterwards.
602    let mut n_after_dot = 0;
603    let mut exponent = 0_i64;
604    let mut implicit_exponent: i64;
605    let int_end = n_digits as i64;
606    let mut fraction_digits = None;
607    let has_decimal = byte.first_is_cased(decimal_point);
608    if has_decimal {
609        // SAFETY: byte cannot be empty due to `first_is`
610        unsafe { byte.step_unchecked() };
611        let before = byte.clone();
612        #[cfg(not(feature = "compact"))]
613        parse_8digits::<_, FORMAT>(byte.fraction_iter(), &mut mantissa);
614        parse_digits::<_, _, FORMAT>(byte.fraction_iter(), |digit| {
615            mantissa = mantissa.wrapping_mul(format.radix() as u64).wrapping_add(digit as u64);
616        });
617        n_after_dot = byte.current_count() - before.current_count();
618        // NOTE: We can't use the number of digits to extract the slice for
619        // non-contiguous iterators, but we also need to the number of digits
620        // for our value calculation. We store both, and let the compiler know
621        // to optimize it out when not needed.
622        let b_after_dot = if cfg!(feature = "format") && !byte.fraction_iter().is_contiguous() {
623            byte.cursor() - before.cursor()
624        } else {
625            n_after_dot
626        };
627
628        // Store the fraction digits for slow-path algorithms.
629        debug_assert!(
630            b_after_dot <= before.as_slice().len(),
631            "digits after dot must be smaller than buffer"
632        );
633        // SAFETY: safe, since `idx_after_dot <= before.as_slice().len()`.
634        fraction_digits = Some(unsafe { before.as_slice().get_unchecked(..b_after_dot) });
635
636        // Calculate the implicit exponent: the number of digits after the dot.
637        implicit_exponent = -(n_after_dot as i64);
638        if format.mantissa_radix() == format.exponent_base() {
639            exponent = implicit_exponent;
640        } else {
641            debug_assert!(bits_per_digit % bits_per_base == 0, "exponent must be a power of base");
642            exponent = implicit_exponent * bits_per_digit / bits_per_base;
643        };
644        #[cfg(feature = "format")]
645        if format.required_fraction_digits() && n_after_dot == 0 {
646            return Err(Error::EmptyFraction(byte.cursor()));
647        }
648    }
649
650    // NOTE: Check if we have our exponent **BEFORE** checking if the
651    // mantissa is empty, so we can ensure
652    let has_exponent = byte
653        .first_is(exponent_character, format.case_sensitive_exponent() && cfg!(feature = "format"));
654
655    // check to see if we have any invalid leading zeros
656    n_digits += n_after_dot;
657    if format.required_mantissa_digits()
658        && (n_digits == 0 || (cfg!(feature = "format") && byte.current_count() == 0))
659    {
660        let any_digits = start.clone().integer_iter().peek().is_some();
661        // NOTE: This is because numbers like `_12.34` have significant digits,
662        // they just don't have a valid digit (#97).
663        if has_decimal || has_exponent || !any_digits || IS_PARTIAL {
664            return Err(Error::EmptyMantissa(byte.cursor()));
665        } else {
666            return Err(Error::InvalidDigit(start.cursor()));
667        }
668    }
669
670    // EXPONENT
671
672    // Handle scientific notation.
673    let mut explicit_exponent = 0_i64;
674    if has_exponent {
675        // NOTE: See above for the safety invariant above `required_mantissa_digits`.
676        // This is separated for correctness concerns, and therefore the two cannot
677        // be on the same line.
678        // SAFETY: byte cannot be empty due to `first_is` from `has_exponent`.`
679        unsafe { byte.step_unchecked() };
680
681        // Check float format syntax checks.
682        #[cfg(feature = "format")]
683        {
684            // NOTE: We've overstepped for the safety invariant before.
685            if format.no_exponent_notation() {
686                return Err(Error::InvalidExponent(byte.cursor() - 1));
687            }
688            // Check if we have no fraction but we required exponent notation.
689            if format.no_exponent_without_fraction() && fraction_digits.is_none() {
690                return Err(Error::ExponentWithoutFraction(byte.cursor() - 1));
691            }
692        }
693
694        let is_negative_exponent = parse_exponent_sign(&mut byte)?;
695        let before = byte.current_count();
696        parse_digits::<_, _, FORMAT>(byte.exponent_iter(), |digit| {
697            if explicit_exponent < 0x10000000 {
698                explicit_exponent *= format.radix() as i64;
699                explicit_exponent += digit as i64;
700            }
701        });
702        if format.required_exponent_digits() && byte.current_count() - before == 0 {
703            return Err(Error::EmptyExponent(byte.cursor()));
704        }
705        // Handle our sign, and get the explicit part of the exponent.
706        explicit_exponent = if is_negative_exponent {
707            -explicit_exponent
708        } else {
709            explicit_exponent
710        };
711        exponent += explicit_exponent;
712    } else if cfg!(feature = "format") && format.required_exponent_notation() {
713        return Err(Error::MissingExponent(byte.cursor()));
714    }
715
716    // Check to see if we have a valid base suffix.
717    // We've already trimmed any leading digit separators here, so we can be safe
718    // that the first character **is not** a digit separator.
719    #[allow(unused_variables)]
720    let base_suffix = format.base_suffix();
721    #[cfg(feature = "format")]
722    if base_suffix != 0 {
723        if byte.first_is(base_suffix, format.case_sensitive_base_suffix()) {
724            // SAFETY: safe since `byte.len() >= 1`.
725            unsafe { byte.step_unchecked() };
726        }
727    }
728
729    // CHECK OVERFLOW
730
731    // Get the number of parsed digits (total), and redo if we had overflow.
732    let end = byte.cursor();
733    let mut step = u64_step(format.radix());
734    let mut many_digits = false;
735    #[cfg(feature = "format")]
736    if !format.required_mantissa_digits() && n_digits == 0 {
737        exponent = 0;
738    }
739    if n_digits <= step {
740        return Ok((
741            Number {
742                exponent,
743                mantissa,
744                is_negative,
745                many_digits: false,
746                integer: integer_digits,
747                fraction: fraction_digits,
748            },
749            end,
750        ));
751    }
752
753    // Check for leading zeros, and to see if we had a false overflow.
754    n_digits -= step;
755    let mut zeros = start.clone();
756    let mut zeros_integer = zeros.integer_iter();
757    n_digits = n_digits.saturating_sub(zeros_integer.skip_zeros());
758    if zeros.first_is_cased(decimal_point) {
759        // SAFETY: safe since zeros cannot be empty due to `first_is`
760        unsafe { zeros.step_unchecked() };
761    }
762    let mut zeros_fraction = zeros.fraction_iter();
763    n_digits = n_digits.saturating_sub(zeros_fraction.skip_zeros());
764
765    // OVERFLOW
766
767    // Now, check if we explicitly overflowed.
768    if n_digits > 0 {
769        // Have more than 19 significant digits, so we overflowed.
770        many_digits = true;
771        mantissa = 0;
772        let mut integer = integer_digits.bytes::<{ FORMAT }>();
773        // Skip leading zeros, so we can use the step properly.
774        let mut integer_iter = integer.integer_iter();
775        integer_iter.skip_zeros();
776        parse_u64_digits::<_, FORMAT>(integer_iter, &mut mantissa, &mut step);
777        // NOTE: With the format feature enabled and non-contiguous iterators, we can
778        // have null fraction digits even if step was not 0. We want to make the
779        // none check as late in there as possible: any of them should
780        // short-circuit and should be determined at compile time. So, the
781        // conditions are either:
782        // 1. Step == 0
783        // 2. `cfg!(feature = "format") && !byte.is_contiguous() &&
784        //    fraction_digits.is_none()`
785        implicit_exponent = if step == 0
786            || (cfg!(feature = "format") && !byte.is_contiguous() && fraction_digits.is_none())
787        {
788            // Filled our mantissa with just the integer.
789            int_end - integer.current_count() as i64
790        } else {
791            // We know this can't be a None since we had more than 19
792            // digits previously, so we overflowed a 64-bit integer,
793            // but parsing only the integral digits produced less
794            // than 19 digits. That means we must have a decimal
795            // point, and at least 1 fractional digit.
796            let mut fraction = fraction_digits.unwrap().bytes::<{ FORMAT }>();
797            let mut fraction_iter = fraction.fraction_iter();
798            // Skip leading zeros, so we can use the step properly.
799            if mantissa == 0 {
800                fraction_iter.skip_zeros();
801            }
802            parse_u64_digits::<_, FORMAT>(fraction_iter, &mut mantissa, &mut step);
803            -(fraction.current_count() as i64)
804        };
805        if format.mantissa_radix() == format.exponent_base() {
806            exponent = implicit_exponent;
807        } else {
808            debug_assert!(bits_per_digit % bits_per_base == 0, "exponent must be a power of base");
809            exponent = implicit_exponent * bits_per_digit / bits_per_base;
810        };
811        // Add back the explicit exponent.
812        exponent += explicit_exponent;
813    }
814
815    Ok((
816        Number {
817            exponent,
818            mantissa,
819            is_negative,
820            many_digits,
821            integer: integer_digits,
822            fraction: fraction_digits,
823        },
824        end,
825    ))
826}
827
828pub fn parse_partial_number<'a, const FORMAT: u128>(
829    byte: Bytes<'a, FORMAT>,
830    is_negative: bool,
831    options: &Options,
832) -> Result<(Number<'a>, usize)> {
833    parse_number::<FORMAT, true>(byte, is_negative, options)
834}
835
836/// Try to parse a non-special floating point number.
837#[inline(always)]
838pub fn parse_complete_number<'a, const FORMAT: u128>(
839    byte: Bytes<'a, FORMAT>,
840    is_negative: bool,
841    options: &Options,
842) -> Result<Number<'a>> {
843    // Then have a const `IsPartial` as well
844    let length = byte.buffer_length();
845    let (float, count) = parse_number::<FORMAT, false>(byte, is_negative, options)?;
846    if count == length {
847        Ok(float)
848    } else {
849        Err(Error::InvalidDigit(count))
850    }
851}
852
853// DIGITS
854// ------
855
856/// Iteratively parse and consume digits from bytes.
857#[inline(always)]
858pub fn parse_digits<'a, Iter, Cb, const FORMAT: u128>(mut iter: Iter, mut cb: Cb)
859where
860    Iter: DigitsIter<'a>,
861    Cb: FnMut(u32),
862{
863    let format = NumberFormat::<{ FORMAT }> {};
864    let radix = format.radix();
865    while let Some(&c) = iter.peek() {
866        match char_to_digit_const(c, radix) {
867            Some(v) => cb(v),
868            None => break,
869        }
870        // SAFETY: iter cannot be empty due to `iter.peek()`.
871        // NOTE: Because of the match statement, this would optimize poorly with
872        // `read_if`.
873        unsafe { iter.step_unchecked() };
874        iter.increment_count();
875    }
876}
877
878/// Iteratively parse and consume digits in intervals of 8.
879#[inline(always)]
880#[cfg(not(feature = "compact"))]
881pub fn parse_8digits<'a, Iter, const FORMAT: u128>(mut iter: Iter, mantissa: &mut u64)
882where
883    Iter: DigitsIter<'a>,
884{
885    let format = NumberFormat::<{ FORMAT }> {};
886    let radix: u64 = format.radix() as u64;
887    if can_try_parse_multidigit!(iter, radix) {
888        debug_assert!(radix < 16, "radices over 16 will overflow with radix^8");
889        let radix8 = format.radix8() as u64;
890        // Can do up to 2 iterations without overflowing, however, for large
891        // inputs, this is much faster than any other alternative.
892        while let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
893            *mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
894        }
895    }
896}
897
898/// Iteratively parse and consume digits without overflowing.
899///
900/// # Preconditions
901///
902/// There must be at least `step` digits left in iterator.
903#[cfg_attr(not(feature = "compact"), inline(always))]
904pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
905    mut iter: Iter,
906    mantissa: &mut u64,
907    step: &mut usize,
908) where
909    Iter: DigitsIter<'a>,
910{
911    let format = NumberFormat::<{ FORMAT }> {};
912    let radix = format.radix() as u64;
913
914    // Try to parse 8 digits at a time, if we can.
915    #[cfg(not(feature = "compact"))]
916    if can_try_parse_multidigit!(iter, radix) {
917        debug_assert!(radix < 16, "radices over 16 will overflow with radix^8");
918        let radix8 = format.radix8() as u64;
919        while *step > 8 {
920            if let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
921                *mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
922                *step -= 8;
923            } else {
924                break;
925            }
926        }
927    }
928
929    // Parse single digits at a time.
930    while let Some(&c) = iter.peek() {
931        if *step > 0 {
932            let digit = char_to_valid_digit_const(c, radix as u32);
933            *mantissa = *mantissa * radix + digit as u64;
934            *step -= 1;
935            // SAFETY: safe, since `iter` cannot be empty due to `iter.peek()`.
936            unsafe { iter.step_unchecked() };
937            iter.increment_count();
938        } else {
939            break;
940        }
941    }
942}
943
944// SPECIAL
945// -------
946
947/// Determine if the input data matches the special string.
948/// If there's no match, returns 0. Otherwise, returns the byte's cursor.
949#[must_use]
950#[inline(always)]
951pub fn is_special_eq<const FORMAT: u128>(mut byte: Bytes<FORMAT>, string: &'static [u8]) -> usize {
952    let format = NumberFormat::<{ FORMAT }> {};
953    if cfg!(feature = "format") && format.case_sensitive_special() {
954        if shared::starts_with(byte.special_iter(), string.iter()) {
955            // Trim the iterator afterwards.
956            byte.special_iter().peek();
957            return byte.cursor();
958        }
959    } else if shared::starts_with_uncased(byte.special_iter(), string.iter()) {
960        // Trim the iterator afterwards.
961        byte.special_iter().peek();
962        return byte.cursor();
963    }
964    0
965}
966
967/// Parse a positive representation of a special, non-finite float.
968#[must_use]
969#[cfg_attr(not(feature = "compact"), inline(always))]
970pub fn parse_positive_special<F, const FORMAT: u128>(
971    byte: Bytes<FORMAT>,
972    options: &Options,
973) -> Option<(F, usize)>
974where
975    F: LemireFloat,
976{
977    let format = NumberFormat::<{ FORMAT }> {};
978    if cfg!(feature = "format") && format.no_special() {
979        return None;
980    }
981
982    let cursor = byte.cursor();
983    let length = byte.buffer_length() - cursor;
984    if let Some(nan_string) = options.nan_string() {
985        if length >= nan_string.len() {
986            let count = is_special_eq::<FORMAT>(byte.clone(), nan_string);
987            if count != 0 {
988                return Some((F::NAN, count));
989            }
990        }
991    }
992    if let Some(infinity_string) = options.infinity_string() {
993        if length >= infinity_string.len() {
994            let count = is_special_eq::<FORMAT>(byte.clone(), infinity_string);
995            if count != 0 {
996                return Some((F::INFINITY, count));
997            }
998        }
999    }
1000    if let Some(inf_string) = options.inf_string() {
1001        if length >= inf_string.len() {
1002            let count = is_special_eq::<FORMAT>(byte.clone(), inf_string);
1003            if count != 0 {
1004                return Some((F::INFINITY, count));
1005            }
1006        }
1007    }
1008
1009    None
1010}
1011
1012/// Parse a partial representation of a special, non-finite float.
1013#[must_use]
1014#[inline(always)]
1015pub fn parse_partial_special<F, const FORMAT: u128>(
1016    byte: Bytes<FORMAT>,
1017    is_negative: bool,
1018    options: &Options,
1019) -> Option<(F, usize)>
1020where
1021    F: LemireFloat,
1022{
1023    let (mut float, count) = parse_positive_special::<F, FORMAT>(byte, options)?;
1024    if is_negative {
1025        float = -float;
1026    }
1027    Some((float, count))
1028}
1029
1030/// Try to parse a special, non-finite float.
1031#[must_use]
1032#[inline(always)]
1033pub fn parse_special<F, const FORMAT: u128>(
1034    byte: Bytes<FORMAT>,
1035    is_negative: bool,
1036    options: &Options,
1037) -> Option<F>
1038where
1039    F: LemireFloat,
1040{
1041    let length = byte.buffer_length();
1042    if let Some((float, count)) = parse_partial_special::<F, FORMAT>(byte, is_negative, options) {
1043        if count == length {
1044            return Some(float);
1045        }
1046    }
1047    None
1048}
lexical_parse_float/parse.rs

lexical_parse_float/
parse.rs