lexical_write_integer/
digit_count.rs

1//! Get the number of digits for an integer formatted for a radix.
2//!
3//! This will always accurately calculate the number of digits for
4//! a given radix, using optimizations for cases with a power-of-two
5//! and decimal numbers.
6
7#![cfg(not(feature = "compact"))]
8#![doc(hidden)]
9
10use lexical_util::{
11    assert::debug_assert_radix,
12    div128::u128_divrem,
13    num::{AsPrimitive, UnsignedInteger},
14    step::u64_step,
15};
16
17use crate::decimal::DecimalCount;
18
19/// Fast integral log2.
20///
21/// This is fairly trivial to explain, since the log2 is related to the
22/// number of bits in the value. Therefore, it has to be related to
23/// `T::BITS - ctlz(x)`. For example, `log2(2) == 1`, and `log2(1) == 0`,
24/// and `log2(3) == 1`. Therefore, we must take the log of an odd number,
25/// and subtract one.
26///
27/// This algorithm is described in detail in "Computing the number of digits
28/// of an integer quickly", available
29/// [here](https://lemire.me/blog/2021/05/28/computing-the-number-of-digits-of-an-integer-quickly/).
30#[inline(always)]
31pub fn fast_log2<T: UnsignedInteger>(x: T) -> usize {
32    T::BITS - 1 - (x | T::ONE).leading_zeros() as usize
33}
34
35// Algorithms to calculate the number of digits from a single value.
36macro_rules! digit_count {
37    // Highly-optimized digit count for 2^N values.
38    (@2 $x:expr) => {{
39        digit_log2($x)
40    }};
41
42    (@4 $x:expr) => {{
43        digit_log4($x)
44    }};
45
46    (@8 $x:expr) => {{
47        digit_log8($x)
48    }};
49
50    (@16 $x:expr) => {{
51        digit_log16($x)
52    }};
53
54    (@32 $x:expr) => {{
55        digit_log32($x)
56    }};
57
58    // Uses a naive approach to calculate the number of digits.
59    // This uses multi-digit optimizations when possible, and always
60    // accurately calculates the number of digits similar to how
61    // the digit generation algorithm works, just without the table
62    // lookups.
63    //
64    // There's no good way to do this, since float logn functions are
65    // lossy and the value might not be exactly represented in the type
66    // (that is, `>= 2^53`), in which case `log(b^x, b)`, `log(b^x + 1, b)`,
67    // and `log(b^x - 1, b)` would all be the same. Rust's integral [`ilog`]
68    // functions just use naive 1-digit at a time multiplication, so it's
69    // less efficient than our optimized variant.
70    //
71    // [`ilog`]: https://github.com/rust-lang/rust/blob/0e98766/library/core/src/num/uint_macros.rs#L1290-L1320
72    (@naive $t:ty, $radix:expr, $x:expr) => {{
73        // If we can do multi-digit optimizations, it's ideal,
74        // so we want to check if our type size max value is >=
75        // to the value.
76        let radix = $radix as u32;
77        let radix2 = radix * radix;
78        let radix4 = radix2 * radix2;
79
80        // NOTE: For radix 10, 0-9 would be 1 digit while 10-99 would be 2 digits,
81        // so this needs to be `>=` and not `>`.
82        let mut digits = 1;
83        let mut value = $x;
84
85        // try 4-digit optimizations
86        if <$t>::BITS >= 32 || radix4 < <$t>::MAX.as_u32() {
87            let radix4 = <$t as AsPrimitive>::from_u32(radix4);
88            while value >= radix4 {
89                digits += 4;
90                value /= radix4;
91            }
92        }
93
94        // try 2-digit optimizations
95        if <$t>::BITS >= 16 || radix2 < <$t>::MAX.as_u32() {
96            let radix2 = <$t as AsPrimitive>::from_u32(radix2);
97            while value >= radix2 {
98                digits += 2;
99                value /= radix2;
100            }
101        }
102
103        // can only do a single digit
104        let radix = <$t as AsPrimitive>::from_u32(radix);
105        while value >= radix {
106            digits += 1;
107            value /= radix;
108        }
109        digits
110    }};
111}
112
113/// Highly-optimized digit count for base2 values.
114///
115/// This is always the number of `BITS - ctlz(x | 1)`, so it's
116/// `fast_log2(x) + 1`. This is because 0 has 1 digit, as does 1,
117/// but 2 and 3 have 2, etc.
118#[inline(always)]
119fn digit_log2<T: UnsignedInteger>(x: T) -> usize {
120    fast_log2(x) + 1
121}
122
123/// Highly-optimized digit count for base4 values.
124///
125/// This is very similar to base 2, except we divide by 2
126/// and adjust by 1. For example, `fast_log2(3) == 1`, so
127/// `fast_log2(3) / 2 == 0`, which then gives us our result.
128///
129/// This works because `log2(x) / 2 == log4(x)`. Flooring is
130/// the correct approach since `log2(15) == 3`, which should be
131/// 2 digits (so `3 / 2 + 1`).
132#[inline(always)]
133fn digit_log4<T: UnsignedInteger>(x: T) -> usize {
134    (fast_log2(x) / 2) + 1
135}
136
137/// Highly-optimized digit count for base8 values.
138///
139/// This works because `log2(x) / 3 == log8(x)`. Flooring is
140/// the correct approach since `log2(63) == 5`, which should be
141/// 2 digits (so `5 / 3 + 1`).
142#[inline(always)]
143fn digit_log8<T: UnsignedInteger>(x: T) -> usize {
144    (fast_log2(x) / 3) + 1
145}
146
147/// Highly-optimized digit count for base16 values.
148///
149/// This works because `log2(x) / 4 == log16(x)`. Flooring is
150/// the correct approach since `log2(255) == 7`, which should be
151/// 2 digits (so `7 / 4 + 1`).
152#[inline(always)]
153fn digit_log16<T: UnsignedInteger>(x: T) -> usize {
154    (fast_log2(x) / 4) + 1
155}
156
157/// Highly-optimized digit count for base32 values.
158///
159/// This works because `log2(x) / 5 == log32(x)`. Flooring is
160/// the correct approach since `log2(1023) == 9`, which should be
161/// 2 digits (so `9 / 5 + 1`).
162#[inline(always)]
163fn digit_log32<T: UnsignedInteger>(x: T) -> usize {
164    (fast_log2(x) / 5) + 1
165}
166
167/// Quickly calculate the number of digits in a type.
168///
169/// This uses optimizations for powers-of-two and decimal
170/// numbers, which can correctly calculate the number of
171/// values without requiring logs or other expensive
172/// calculations.
173///
174/// # Safety
175///
176/// Safe as long as `digit_count` returns at least the number of
177/// digits that would be written by the integer. If the value is
178/// too small, then the buffer might underflow, causing out-of-bounds
179/// read/writes.
180pub unsafe trait DigitCount: UnsignedInteger + DecimalCount {
181    /// Get the number of digits in a value.
182    #[inline(always)]
183    fn digit_count(self, radix: u32) -> usize {
184        assert!((2..=36).contains(&radix), "radix must be >= 2 and <= 36");
185        match radix {
186            // decimal
187            10 => self.decimal_count(),
188            // 2^N
189            2 => digit_count!(@2 self),
190            4 => digit_count!(@4 self),
191            8 => digit_count!(@8 self),
192            16 => digit_count!(@16 self),
193            32 => digit_count!(@32 self),
194            // fallback
195            _ => digit_count!(@naive Self, radix, self),
196        }
197    }
198
199    /// Get the number of digits in a value, always using the slow algorithm.
200    ///
201    /// This is exposed for testing purposes.
202    #[inline(always)]
203    fn slow_digit_count(self, radix: u32) -> usize {
204        digit_count!(@naive Self, radix, self)
205    }
206}
207
208// Implement digit counts for all types.
209macro_rules! digit_impl {
210    ($($t:ty)*) => ($(
211        // SAFETY: Safe since it uses the default implementation.
212        unsafe impl DigitCount for $t {
213        }
214    )*);
215}
216
217digit_impl! { u8 u16 usize u32 u64 }
218
219// SAFETY: Safe since it specialized in terms of `div128_rem`, which
220// is the same way the digits are generated.
221unsafe impl DigitCount for u128 {
222    /// Get the number of digits in a value.
223    #[inline(always)]
224    fn digit_count(self, radix: u32) -> usize {
225        debug_assert_radix(radix);
226        match radix {
227            // decimal
228            10 => self.decimal_count(),
229            // 2^N
230            2 => digit_count!(@2 self),
231            4 => digit_count!(@4 self),
232            8 => digit_count!(@8 self),
233            16 => digit_count!(@16 self),
234            32 => digit_count!(@32 self),
235            // fallback
236            _ => {
237                // NOTE: This follows the same implementation as the digit count
238                // generation, so this is safe.
239                if self <= u64::MAX as u128 {
240                    return digit_count!(@naive u64, radix, self as u64);
241                }
242
243                // Doesn't fit in 64 bits, let's try our divmod.
244                let step = u64_step(radix);
245                let (value, _) = u128_divrem(self, radix);
246                let mut count = step;
247                if value <= u64::MAX as u128 {
248                    count += digit_count!(@naive u64, radix, value as u64);
249                } else {
250                    // Value has to be greater than 1.8e38
251                    let (value, _) = u128_divrem(value, radix);
252                    count += step;
253                    if value != 0 {
254                        count += digit_count!(@naive u64, radix, value as u64);
255                    }
256                }
257
258                count
259            },
260        }
261    }
262}