lexical_write_integer/
decimal.rs

1//! Radix-generic, lexical integer-to-string conversion routines.
2//!
3//! An optimization for decimal is pre-computing the number of digits written
4//! prior to actually writing digits, avoiding the use of temporary buffers.
5//! This scales well with integer size, short of `u128`, due to the slower
6//! division algorithms required.
7//!
8//! See [`Algorithm.md`] for a more detailed description of the algorithm
9//! choice here.
10//!
11//! [`Algorithm.md`]: https://github.com/Alexhuszagh/rust-lexical/blob/main/lexical-write-integer/docs/Algorithm.md
12
13#![cfg(not(feature = "compact"))]
14#![doc(hidden)]
15
16use lexical_util::num::UnsignedInteger;
17
18use crate::digit_count::fast_log2;
19use crate::jeaiii;
20
21/// Calculate the fast, integral log10 of a value.
22///
23/// This is relatively easy to explain as well: we calculate the log2
24/// of the value, then multiply by an integral constant for the log10(2).
25///
26/// Note that this value is frequently off by 1, so we need to round-up
27/// accordingly. This magic number is valid at least up until `1<<18`,
28/// which works for all values, since our max log2 is 127.
29#[inline(always)]
30pub fn fast_log10<T: UnsignedInteger>(x: T) -> usize {
31    let log2 = fast_log2(x);
32    (log2 * 1233) >> 12
33}
34
35/// Fast algorithm to calculate the number of digits in an integer.
36///
37/// We only use this for 32-bit or smaller values: for larger numbers,
38/// we first write digits until we get to 32-bits, then we call this.
39///
40/// The values are as follows:
41///
42/// - `2^32 for j = 1`
43/// - `⌈log10(2^j)⌉ * 2^128 + 2^128 – 10^(⌈log10(2j)⌉) for j from 2 to 30`
44/// - `⌈log10(2^j)⌉ for j = 31 and j = 32`
45///
46/// This algorithm is described in detail in "Computing the number of digits
47/// of an integer even faster", available
48/// [here](https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/).
49#[inline(always)]
50pub fn fast_digit_count(x: u32) -> usize {
51    const TABLE: [u64; 32] = [
52        4294967296,
53        8589934582,
54        8589934582,
55        8589934582,
56        12884901788,
57        12884901788,
58        12884901788,
59        17179868184,
60        17179868184,
61        17179868184,
62        21474826480,
63        21474826480,
64        21474826480,
65        21474826480,
66        25769703776,
67        25769703776,
68        25769703776,
69        30063771072,
70        30063771072,
71        30063771072,
72        34349738368,
73        34349738368,
74        34349738368,
75        34349738368,
76        38554705664,
77        38554705664,
78        38554705664,
79        41949672960,
80        41949672960,
81        41949672960,
82        42949672960,
83        42949672960,
84    ];
85    // This always safe, since `fast_log2` will always return a value
86    // <= 32. This is because the range of values from `ctlz(x | 1)` is
87    // `[0, 31]`, so `32 - 1 - ctlz(x | 1)` must be in the range `[0, 31]`.
88    let shift = TABLE[fast_log2(x)];
89    let count = (x as u64 + shift) >> 32;
90    count as usize
91}
92
93/// Slightly slower algorithm to calculate the number of digits in an integer.
94///
95/// This uses no static storage, and uses a fast log10(2) estimation
96/// to calculate the number of digits, from the log2 value.
97///
98/// This algorithm is described in detail in "Computing the number of digits
99/// of an integer even faster", available
100/// [here](https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/).
101#[inline(always)]
102pub fn fallback_digit_count<T: UnsignedInteger>(x: T, table: &[T]) -> usize {
103    // This value is always within 1: calculate if we need to round-up
104    // based on a pre-computed table.
105    let log10 = fast_log10(x);
106    let shift_up = table.get(log10).map_or(false, |&y| x >= y);
107
108    log10 + shift_up as usize + 1
109}
110
111/// Quickly calculate the number of decimal digits in a type.
112///
113/// # Safety
114///
115/// Safe as long as `digit_count` returns at least the number of
116/// digits that would be written by the integer. If the value is
117/// too small, then the buffer might underflow, causing out-of-bounds
118/// read/writes.
119pub unsafe trait DecimalCount: UnsignedInteger {
120    /// Get the number of digits in a value.
121    fn decimal_count(self) -> usize;
122}
123
124// SAFETY: Safe since `fast_digit_count` is always correct for `<= u32::MAX`.
125unsafe impl DecimalCount for u8 {
126    #[inline(always)]
127    fn decimal_count(self) -> usize {
128        fast_digit_count(self as u32)
129    }
130}
131
132// SAFETY: Safe since `fast_digit_count` is always correct for `<= u32::MAX`.
133unsafe impl DecimalCount for u16 {
134    #[inline(always)]
135    fn decimal_count(self) -> usize {
136        fast_digit_count(self as u32)
137    }
138}
139
140// SAFETY: Safe since `fast_digit_count` is always correct for `<= u32::MAX`.
141unsafe impl DecimalCount for u32 {
142    #[inline(always)]
143    fn decimal_count(self) -> usize {
144        fast_digit_count(self)
145    }
146}
147
148// SAFETY: Safe since `fallback_digit_count` is valid for the current table,
149// as described in <https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/>
150unsafe impl DecimalCount for u64 {
151    #[inline(always)]
152    fn decimal_count(self) -> usize {
153        const TABLE: [u64; 19] = [
154            10,
155            100,
156            1000,
157            10000,
158            100000,
159            1000000,
160            10000000,
161            100000000,
162            1000000000,
163            10000000000,
164            100000000000,
165            1000000000000,
166            10000000000000,
167            100000000000000,
168            1000000000000000,
169            10000000000000000,
170            100000000000000000,
171            1000000000000000000,
172            10000000000000000000,
173        ];
174        fallback_digit_count(self, &TABLE)
175    }
176}
177
178// SAFETY: Safe since `fallback_digit_count` is valid for the current table,
179// as described in <https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/>
180unsafe impl DecimalCount for u128 {
181    #[inline(always)]
182    fn decimal_count(self) -> usize {
183        const TABLE: [u128; 38] = [
184            10,
185            100,
186            1000,
187            10000,
188            100000,
189            1000000,
190            10000000,
191            100000000,
192            1000000000,
193            10000000000,
194            100000000000,
195            1000000000000,
196            10000000000000,
197            100000000000000,
198            1000000000000000,
199            10000000000000000,
200            100000000000000000,
201            1000000000000000000,
202            10000000000000000000,
203            100000000000000000000,
204            1000000000000000000000,
205            10000000000000000000000,
206            100000000000000000000000,
207            1000000000000000000000000,
208            10000000000000000000000000,
209            100000000000000000000000000,
210            1000000000000000000000000000,
211            10000000000000000000000000000,
212            100000000000000000000000000000,
213            1000000000000000000000000000000,
214            10000000000000000000000000000000,
215            100000000000000000000000000000000,
216            1000000000000000000000000000000000,
217            10000000000000000000000000000000000,
218            100000000000000000000000000000000000,
219            1000000000000000000000000000000000000,
220            10000000000000000000000000000000000000,
221            100000000000000000000000000000000000000,
222        ];
223        fallback_digit_count(self, &TABLE)
224    }
225}
226
227// SAFETY: Safe since it uses the default implementation for the type size.
228unsafe impl DecimalCount for usize {
229    #[inline(always)]
230    fn decimal_count(self) -> usize {
231        match Self::BITS {
232            8 | 16 | 32 => (self as u32).decimal_count(),
233            64 => (self as u64).decimal_count(),
234            128 => (self as u128).decimal_count(),
235            _ => unimplemented!(),
236        }
237    }
238}
239
240/// Write integer to decimal string.
241pub trait Decimal: DecimalCount {
242    fn decimal(self, buffer: &mut [u8]) -> usize;
243
244    /// Specialized overload is the type is sized.
245    ///
246    /// # Panics
247    ///
248    /// If the data original provided was unsigned and therefore
249    /// has more digits than the signed variant. This only affects
250    /// `i64` (see #191).
251    #[inline(always)]
252    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
253        self.decimal(buffer)
254    }
255}
256
257// Implement decimal for type.
258macro_rules! decimal_impl {
259    ($($t:ty; $f:ident)*) => ($(
260        impl Decimal for $t {
261            #[inline(always)]
262            fn decimal(self, buffer: &mut [u8]) -> usize {
263                jeaiii::$f(self, buffer)
264            }
265        }
266    )*);
267}
268
269decimal_impl! {
270    u8; from_u8
271    u16; from_u16
272    u32; from_u32
273    u128; from_u128
274}
275
276impl Decimal for u64 {
277    #[inline(always)]
278    fn decimal(self, buffer: &mut [u8]) -> usize {
279        jeaiii::from_u64(self, buffer)
280    }
281
282    #[inline(always)]
283    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
284        jeaiii::from_i64(self, buffer)
285    }
286}
287
288impl Decimal for usize {
289    #[inline(always)]
290    fn decimal(self, buffer: &mut [u8]) -> usize {
291        match usize::BITS {
292            8 => (self as u8).decimal(buffer),
293            16 => (self as u16).decimal(buffer),
294            32 => (self as u32).decimal(buffer),
295            64 => (self as u64).decimal(buffer),
296            128 => (self as u128).decimal(buffer),
297            _ => unimplemented!(),
298        }
299    }
300
301    #[inline(always)]
302    fn decimal_signed(self, buffer: &mut [u8]) -> usize {
303        match usize::BITS {
304            8 => (self as u8).decimal_signed(buffer),
305            16 => (self as u16).decimal_signed(buffer),
306            32 => (self as u32).decimal_signed(buffer),
307            64 => (self as u64).decimal_signed(buffer),
308            128 => (self as u128).decimal_signed(buffer),
309            _ => unimplemented!(),
310        }
311    }
312}