serde_json/lexical/math.rs
1// Adapted from https://github.com/Alexhuszagh/rust-lexical.
2
3//! Building-blocks for arbitrary-precision math.
4//!
5//! These algorithms assume little-endian order for the large integer
6//! buffers, so for a `vec![0, 1, 2, 3]`, `3` is the most significant limb,
7//! and `0` is the least significant limb.
8
9use super::large_powers;
10use super::num::*;
11use super::small_powers::*;
12use alloc::vec::Vec;
13use core::{cmp, iter, mem};
14
15// ALIASES
16// -------
17
18// Type for a single limb of the big integer.
19//
20// A limb is analogous to a digit in base10, except, it stores 32-bit
21// or 64-bit numbers instead.
22//
23// This should be all-known 64-bit platforms supported by Rust.
24// https://forge.rust-lang.org/platform-support.html
25//
26// Platforms where native 128-bit multiplication is explicitly supported:
27// - x86_64 (Supported via `MUL`).
28// - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from).
29//
30// Platforms where native 64-bit multiplication is supported and
31// you can extract hi-lo for 64-bit multiplications.
32// aarch64 (Requires `UMULH` and `MUL` to capture high and low bits).
33// powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits).
34//
35// Platforms where native 128-bit multiplication is not supported,
36// requiring software emulation.
37// sparc64 (`UMUL` only supported double-word arguments).
38
39// 32-BIT LIMB
40#[cfg(fast_arithmetic = "32")]
41pub type Limb = u32;
42
43#[cfg(fast_arithmetic = "32")]
44pub const POW5_LIMB: &[Limb] = &POW5_32;
45
46#[cfg(fast_arithmetic = "32")]
47pub const POW10_LIMB: &[Limb] = &POW10_32;
48
49#[cfg(fast_arithmetic = "32")]
50type Wide = u64;
51
52// 64-BIT LIMB
53#[cfg(fast_arithmetic = "64")]
54pub type Limb = u64;
55
56#[cfg(fast_arithmetic = "64")]
57pub const POW5_LIMB: &[Limb] = &POW5_64;
58
59#[cfg(fast_arithmetic = "64")]
60pub const POW10_LIMB: &[Limb] = &POW10_64;
61
62#[cfg(fast_arithmetic = "64")]
63type Wide = u128;
64
65/// Cast to limb type.
66#[inline]
67pub(crate) fn as_limb<T: Integer>(t: T) -> Limb {
68 Limb::as_cast(t)
69}
70
71/// Cast to wide type.
72#[inline]
73fn as_wide<T: Integer>(t: T) -> Wide {
74 Wide::as_cast(t)
75}
76
77// SPLIT
78// -----
79
80/// Split u64 into limbs, in little-endian order.
81#[inline]
82#[cfg(fast_arithmetic = "32")]
83fn split_u64(x: u64) -> [Limb; 2] {
84 [as_limb(x), as_limb(x >> 32)]
85}
86
87/// Split u64 into limbs, in little-endian order.
88#[inline]
89#[cfg(fast_arithmetic = "64")]
90fn split_u64(x: u64) -> [Limb; 1] {
91 [as_limb(x)]
92}
93
94// HI64
95// ----
96
97// NONZERO
98
99/// Check if any of the remaining bits are non-zero.
100#[inline]
101pub fn nonzero<T: Integer>(x: &[T], rindex: usize) -> bool {
102 let len = x.len();
103 let slc = &x[..len - rindex];
104 slc.iter().rev().any(|&x| x != T::ZERO)
105}
106
107/// Shift 64-bit integer to high 64-bits.
108#[inline]
109fn u64_to_hi64_1(r0: u64) -> (u64, bool) {
110 debug_assert!(r0 != 0);
111 let ls = r0.leading_zeros();
112 (r0 << ls, false)
113}
114
115/// Shift 2 64-bit integers to high 64-bits.
116#[inline]
117fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
118 debug_assert!(r0 != 0);
119 let ls = r0.leading_zeros();
120 let rs = 64 - ls;
121 let v = match ls {
122 0 => r0,
123 _ => (r0 << ls) | (r1 >> rs),
124 };
125 let n = r1 << ls != 0;
126 (v, n)
127}
128
129/// Trait to export the high 64-bits from a little-endian slice.
130trait Hi64<T>: AsRef<[T]> {
131 /// Get the hi64 bits from a 1-limb slice.
132 fn hi64_1(&self) -> (u64, bool);
133
134 /// Get the hi64 bits from a 2-limb slice.
135 fn hi64_2(&self) -> (u64, bool);
136
137 /// Get the hi64 bits from a 3-limb slice.
138 fn hi64_3(&self) -> (u64, bool);
139
140 /// High-level exporter to extract the high 64 bits from a little-endian slice.
141 #[inline]
142 fn hi64(&self) -> (u64, bool) {
143 match self.as_ref().len() {
144 0 => (0, false),
145 1 => self.hi64_1(),
146 2 => self.hi64_2(),
147 _ => self.hi64_3(),
148 }
149 }
150}
151
152impl Hi64<u32> for [u32] {
153 #[inline]
154 fn hi64_1(&self) -> (u64, bool) {
155 debug_assert!(self.len() == 1);
156 let r0 = self[0] as u64;
157 u64_to_hi64_1(r0)
158 }
159
160 #[inline]
161 fn hi64_2(&self) -> (u64, bool) {
162 debug_assert!(self.len() == 2);
163 let r0 = (self[1] as u64) << 32;
164 let r1 = self[0] as u64;
165 u64_to_hi64_1(r0 | r1)
166 }
167
168 #[inline]
169 fn hi64_3(&self) -> (u64, bool) {
170 debug_assert!(self.len() >= 3);
171 let r0 = self[self.len() - 1] as u64;
172 let r1 = (self[self.len() - 2] as u64) << 32;
173 let r2 = self[self.len() - 3] as u64;
174 let (v, n) = u64_to_hi64_2(r0, r1 | r2);
175 (v, n || nonzero(self, 3))
176 }
177}
178
179impl Hi64<u64> for [u64] {
180 #[inline]
181 fn hi64_1(&self) -> (u64, bool) {
182 debug_assert!(self.len() == 1);
183 let r0 = self[0];
184 u64_to_hi64_1(r0)
185 }
186
187 #[inline]
188 fn hi64_2(&self) -> (u64, bool) {
189 debug_assert!(self.len() >= 2);
190 let r0 = self[self.len() - 1];
191 let r1 = self[self.len() - 2];
192 let (v, n) = u64_to_hi64_2(r0, r1);
193 (v, n || nonzero(self, 2))
194 }
195
196 #[inline]
197 fn hi64_3(&self) -> (u64, bool) {
198 self.hi64_2()
199 }
200}
201
202// SCALAR
203// ------
204
205// Scalar-to-scalar operations, for building-blocks for arbitrary-precision
206// operations.
207
208mod scalar {
209 use super::*;
210
211 // ADDITION
212
213 /// Add two small integers and return the resulting value and if overflow happens.
214 #[inline]
215 pub fn add(x: Limb, y: Limb) -> (Limb, bool) {
216 x.overflowing_add(y)
217 }
218
219 /// AddAssign two small integers and return if overflow happens.
220 #[inline]
221 pub fn iadd(x: &mut Limb, y: Limb) -> bool {
222 let t = add(*x, y);
223 *x = t.0;
224 t.1
225 }
226
227 // SUBTRACTION
228
229 /// Subtract two small integers and return the resulting value and if overflow happens.
230 #[inline]
231 pub fn sub(x: Limb, y: Limb) -> (Limb, bool) {
232 x.overflowing_sub(y)
233 }
234
235 /// SubAssign two small integers and return if overflow happens.
236 #[inline]
237 pub fn isub(x: &mut Limb, y: Limb) -> bool {
238 let t = sub(*x, y);
239 *x = t.0;
240 t.1
241 }
242
243 // MULTIPLICATION
244
245 /// Multiply two small integers (with carry) (and return the overflow contribution).
246 ///
247 /// Returns the (low, high) components.
248 #[inline]
249 pub fn mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) {
250 // Cannot overflow, as long as wide is 2x as wide. This is because
251 // the following is always true:
252 // `Wide::max_value() - (Narrow::max_value() * Narrow::max_value()) >= Narrow::max_value()`
253 let z: Wide = as_wide(x) * as_wide(y) + as_wide(carry);
254 let bits = mem::size_of::<Limb>() * 8;
255 (as_limb(z), as_limb(z >> bits))
256 }
257
258 /// Multiply two small integers (with carry) (and return if overflow happens).
259 #[inline]
260 pub fn imul(x: &mut Limb, y: Limb, carry: Limb) -> Limb {
261 let t = mul(*x, y, carry);
262 *x = t.0;
263 t.1
264 }
265} // scalar
266
267// SMALL
268// -----
269
270// Large-to-small operations, to modify a big integer from a native scalar.
271
272mod small {
273 use super::*;
274
275 // ADDITION
276
277 /// Implied AddAssign implementation for adding a small integer to bigint.
278 ///
279 /// Allows us to choose a start-index in x to store, to allow incrementing
280 /// from a non-zero start.
281 #[inline]
282 pub fn iadd_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
283 if x.len() <= xstart {
284 x.push(y);
285 } else {
286 // Initial add
287 let mut carry = scalar::iadd(&mut x[xstart], y);
288
289 // Increment until overflow stops occurring.
290 let mut size = xstart + 1;
291 while carry && size < x.len() {
292 carry = scalar::iadd(&mut x[size], 1);
293 size += 1;
294 }
295
296 // If we overflowed the buffer entirely, need to add 1 to the end
297 // of the buffer.
298 if carry {
299 x.push(1);
300 }
301 }
302 }
303
304 /// AddAssign small integer to bigint.
305 #[inline]
306 pub fn iadd(x: &mut Vec<Limb>, y: Limb) {
307 iadd_impl(x, y, 0);
308 }
309
310 // SUBTRACTION
311
312 /// SubAssign small integer to bigint.
313 /// Does not do overflowing subtraction.
314 #[inline]
315 pub fn isub_impl(x: &mut Vec<Limb>, y: Limb, xstart: usize) {
316 debug_assert!(x.len() > xstart && (x[xstart] >= y || x.len() > xstart + 1));
317
318 // Initial subtraction
319 let mut carry = scalar::isub(&mut x[xstart], y);
320
321 // Increment until overflow stops occurring.
322 let mut size = xstart + 1;
323 while carry && size < x.len() {
324 carry = scalar::isub(&mut x[size], 1);
325 size += 1;
326 }
327 normalize(x);
328 }
329
330 // MULTIPLICATION
331
332 /// MulAssign small integer to bigint.
333 #[inline]
334 pub fn imul(x: &mut Vec<Limb>, y: Limb) {
335 // Multiply iteratively over all elements, adding the carry each time.
336 let mut carry: Limb = 0;
337 for xi in &mut *x {
338 carry = scalar::imul(xi, y, carry);
339 }
340
341 // Overflow of value, add to end.
342 if carry != 0 {
343 x.push(carry);
344 }
345 }
346
347 /// Mul small integer to bigint.
348 #[inline]
349 pub fn mul(x: &[Limb], y: Limb) -> Vec<Limb> {
350 let mut z = Vec::<Limb>::default();
351 z.extend_from_slice(x);
352 imul(&mut z, y);
353 z
354 }
355
356 /// MulAssign by a power.
357 ///
358 /// Theoretically...
359 ///
360 /// Use an exponentiation by squaring method, since it reduces the time
361 /// complexity of the multiplication to ~`O(log(n))` for the squaring,
362 /// and `O(n*m)` for the result. Since `m` is typically a lower-order
363 /// factor, this significantly reduces the number of multiplications
364 /// we need to do. Iteratively multiplying by small powers follows
365 /// the nth triangular number series, which scales as `O(p^2)`, but
366 /// where `p` is `n+m`. In short, it scales very poorly.
367 ///
368 /// Practically....
369 ///
370 /// Exponentiation by Squaring:
371 /// running 2 tests
372 /// test bigcomp_f32_lexical ... bench: 1,018 ns/iter (+/- 78)
373 /// test bigcomp_f64_lexical ... bench: 3,639 ns/iter (+/- 1,007)
374 ///
375 /// Exponentiation by Iterative Small Powers:
376 /// running 2 tests
377 /// test bigcomp_f32_lexical ... bench: 518 ns/iter (+/- 31)
378 /// test bigcomp_f64_lexical ... bench: 583 ns/iter (+/- 47)
379 ///
380 /// Exponentiation by Iterative Large Powers (of 2):
381 /// running 2 tests
382 /// test bigcomp_f32_lexical ... bench: 671 ns/iter (+/- 31)
383 /// test bigcomp_f64_lexical ... bench: 1,394 ns/iter (+/- 47)
384 ///
385 /// Even using worst-case scenarios, exponentiation by squaring is
386 /// significantly slower for our workloads. Just multiply by small powers,
387 /// in simple cases, and use precalculated large powers in other cases.
388 pub fn imul_pow5(x: &mut Vec<Limb>, n: u32) {
389 use super::large::KARATSUBA_CUTOFF;
390
391 let small_powers = POW5_LIMB;
392 let large_powers = large_powers::POW5;
393
394 if n == 0 {
395 // No exponent, just return.
396 // The 0-index of the large powers is `2^0`, which is 1, so we want
397 // to make sure we don't take that path with a literal 0.
398 return;
399 }
400
401 // We want to use the asymptotically faster algorithm if we're going
402 // to be using Karabatsu multiplication sometime during the result,
403 // otherwise, just use exponentiation by squaring.
404 let bit_length = 32 - n.leading_zeros() as usize;
405 debug_assert!(bit_length != 0 && bit_length <= large_powers.len());
406 if x.len() + large_powers[bit_length - 1].len() < 2 * KARATSUBA_CUTOFF {
407 // We can use iterative small powers to make this faster for the
408 // easy cases.
409
410 // Multiply by the largest small power until n < step.
411 let step = small_powers.len() - 1;
412 let power = small_powers[step];
413 let mut n = n as usize;
414 while n >= step {
415 imul(x, power);
416 n -= step;
417 }
418
419 // Multiply by the remainder.
420 imul(x, small_powers[n]);
421 } else {
422 // In theory, this code should be asymptotically a lot faster,
423 // in practice, our small::imul seems to be the limiting step,
424 // and large imul is slow as well.
425
426 // Multiply by higher order powers.
427 let mut idx: usize = 0;
428 let mut bit: usize = 1;
429 let mut n = n as usize;
430 while n != 0 {
431 if n & bit != 0 {
432 debug_assert!(idx < large_powers.len());
433 large::imul(x, large_powers[idx]);
434 n ^= bit;
435 }
436 idx += 1;
437 bit <<= 1;
438 }
439 }
440 }
441
442 // BIT LENGTH
443
444 /// Get number of leading zero bits in the storage.
445 #[inline]
446 pub fn leading_zeros(x: &[Limb]) -> usize {
447 x.last().map_or(0, |x| x.leading_zeros() as usize)
448 }
449
450 /// Calculate the bit-length of the big-integer.
451 #[inline]
452 pub fn bit_length(x: &[Limb]) -> usize {
453 let bits = mem::size_of::<Limb>() * 8;
454 // Avoid overflowing, calculate via total number of bits
455 // minus leading zero bits.
456 let nlz = leading_zeros(x);
457 bits.checked_mul(x.len())
458 .map_or_else(usize::max_value, |v| v - nlz)
459 }
460
461 // SHL
462
463 /// Shift-left bits inside a buffer.
464 ///
465 /// Assumes `n < Limb::BITS`, IE, internally shifting bits.
466 #[inline]
467 pub fn ishl_bits(x: &mut Vec<Limb>, n: usize) {
468 // Need to shift by the number of `bits % Limb::BITS)`.
469 let bits = mem::size_of::<Limb>() * 8;
470 debug_assert!(n < bits);
471 if n == 0 {
472 return;
473 }
474
475 // Internally, for each item, we shift left by n, and add the previous
476 // right shifted limb-bits.
477 // For example, we transform (for u8) shifted left 2, to:
478 // b10100100 b01000010
479 // b10 b10010001 b00001000
480 let rshift = bits - n;
481 let lshift = n;
482 let mut prev: Limb = 0;
483 for xi in &mut *x {
484 let tmp = *xi;
485 *xi <<= lshift;
486 *xi |= prev >> rshift;
487 prev = tmp;
488 }
489
490 // Always push the carry, even if it creates a non-normal result.
491 let carry = prev >> rshift;
492 if carry != 0 {
493 x.push(carry);
494 }
495 }
496
497 /// Shift-left `n` digits inside a buffer.
498 ///
499 /// Assumes `n` is not 0.
500 #[inline]
501 pub fn ishl_limbs(x: &mut Vec<Limb>, n: usize) {
502 debug_assert!(n != 0);
503 if !x.is_empty() {
504 x.reserve(n);
505 x.splice(..0, iter::repeat(0).take(n));
506 }
507 }
508
509 /// Shift-left buffer by n bits.
510 #[inline]
511 pub fn ishl(x: &mut Vec<Limb>, n: usize) {
512 let bits = mem::size_of::<Limb>() * 8;
513 // Need to pad with zeros for the number of `bits / Limb::BITS`,
514 // and shift-left with carry for `bits % Limb::BITS`.
515 let rem = n % bits;
516 let div = n / bits;
517 ishl_bits(x, rem);
518 if div != 0 {
519 ishl_limbs(x, div);
520 }
521 }
522
523 // NORMALIZE
524
525 /// Normalize the container by popping any leading zeros.
526 #[inline]
527 pub fn normalize(x: &mut Vec<Limb>) {
528 // Remove leading zero if we cause underflow. Since we're dividing
529 // by a small power, we have at max 1 int removed.
530 while x.last() == Some(&0) {
531 x.pop();
532 }
533 }
534} // small
535
536// LARGE
537// -----
538
539// Large-to-large operations, to modify a big integer from a native scalar.
540
541mod large {
542 use super::*;
543
544 // RELATIVE OPERATORS
545
546 /// Compare `x` to `y`, in little-endian order.
547 #[inline]
548 pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering {
549 if x.len() > y.len() {
550 cmp::Ordering::Greater
551 } else if x.len() < y.len() {
552 cmp::Ordering::Less
553 } else {
554 let iter = x.iter().rev().zip(y.iter().rev());
555 for (&xi, &yi) in iter {
556 if xi > yi {
557 return cmp::Ordering::Greater;
558 } else if xi < yi {
559 return cmp::Ordering::Less;
560 }
561 }
562 // Equal case.
563 cmp::Ordering::Equal
564 }
565 }
566
567 /// Check if x is less than y.
568 #[inline]
569 pub fn less(x: &[Limb], y: &[Limb]) -> bool {
570 compare(x, y) == cmp::Ordering::Less
571 }
572
573 /// Check if x is greater than or equal to y.
574 #[inline]
575 pub fn greater_equal(x: &[Limb], y: &[Limb]) -> bool {
576 !less(x, y)
577 }
578
579 // ADDITION
580
581 /// Implied AddAssign implementation for bigints.
582 ///
583 /// Allows us to choose a start-index in x to store, so we can avoid
584 /// padding the buffer with zeros when not needed, optimized for vectors.
585 pub fn iadd_impl(x: &mut Vec<Limb>, y: &[Limb], xstart: usize) {
586 // The effective x buffer is from `xstart..x.len()`, so we need to treat
587 // that as the current range. If the effective y buffer is longer, need
588 // to resize to that, + the start index.
589 if y.len() > x.len() - xstart {
590 x.resize(y.len() + xstart, 0);
591 }
592
593 // Iteratively add elements from y to x.
594 let mut carry = false;
595 for (xi, yi) in x[xstart..].iter_mut().zip(y.iter()) {
596 // Only one op of the two can overflow, since we added at max
597 // Limb::max_value() + Limb::max_value(). Add the previous carry,
598 // and store the current carry for the next.
599 let mut tmp = scalar::iadd(xi, *yi);
600 if carry {
601 tmp |= scalar::iadd(xi, 1);
602 }
603 carry = tmp;
604 }
605
606 // Overflow from the previous bit.
607 if carry {
608 small::iadd_impl(x, 1, y.len() + xstart);
609 }
610 }
611
612 /// AddAssign bigint to bigint.
613 #[inline]
614 pub fn iadd(x: &mut Vec<Limb>, y: &[Limb]) {
615 iadd_impl(x, y, 0);
616 }
617
618 /// Add bigint to bigint.
619 #[inline]
620 pub fn add(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
621 let mut z = Vec::<Limb>::default();
622 z.extend_from_slice(x);
623 iadd(&mut z, y);
624 z
625 }
626
627 // SUBTRACTION
628
629 /// SubAssign bigint to bigint.
630 pub fn isub(x: &mut Vec<Limb>, y: &[Limb]) {
631 // Basic underflow checks.
632 debug_assert!(greater_equal(x, y));
633
634 // Iteratively add elements from y to x.
635 let mut carry = false;
636 for (xi, yi) in x.iter_mut().zip(y.iter()) {
637 // Only one op of the two can overflow, since we added at max
638 // Limb::max_value() + Limb::max_value(). Add the previous carry,
639 // and store the current carry for the next.
640 let mut tmp = scalar::isub(xi, *yi);
641 if carry {
642 tmp |= scalar::isub(xi, 1);
643 }
644 carry = tmp;
645 }
646
647 if carry {
648 small::isub_impl(x, 1, y.len());
649 } else {
650 small::normalize(x);
651 }
652 }
653
654 // MULTIPLICATION
655
656 /// Number of digits to bottom-out to asymptotically slow algorithms.
657 ///
658 /// Karatsuba tends to out-perform long-multiplication at ~320-640 bits,
659 /// so we go halfway, while Newton division tends to out-perform
660 /// Algorithm D at ~1024 bits. We can toggle this for optimal performance.
661 pub const KARATSUBA_CUTOFF: usize = 32;
662
663 /// Grade-school multiplication algorithm.
664 ///
665 /// Slow, naive algorithm, using limb-bit bases and just shifting left for
666 /// each iteration. This could be optimized with numerous other algorithms,
667 /// but it's extremely simple, and works in O(n*m) time, which is fine
668 /// by me. Each iteration, of which there are `m` iterations, requires
669 /// `n` multiplications, and `n` additions, or grade-school multiplication.
670 fn long_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
671 // Using the immutable value, multiply by all the scalars in y, using
672 // the algorithm defined above. Use a single buffer to avoid
673 // frequent reallocations. Handle the first case to avoid a redundant
674 // addition, since we know y.len() >= 1.
675 let mut z: Vec<Limb> = small::mul(x, y[0]);
676 z.resize(x.len() + y.len(), 0);
677
678 // Handle the iterative cases.
679 for (i, &yi) in y[1..].iter().enumerate() {
680 let zi: Vec<Limb> = small::mul(x, yi);
681 iadd_impl(&mut z, &zi, i + 1);
682 }
683
684 small::normalize(&mut z);
685
686 z
687 }
688
689 /// Split two buffers into halfway, into (lo, hi).
690 #[inline]
691 pub fn karatsuba_split(z: &[Limb], m: usize) -> (&[Limb], &[Limb]) {
692 (&z[..m], &z[m..])
693 }
694
695 /// Karatsuba multiplication algorithm with roughly equal input sizes.
696 ///
697 /// Assumes `y.len() >= x.len()`.
698 fn karatsuba_mul(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
699 if y.len() <= KARATSUBA_CUTOFF {
700 // Bottom-out to long division for small cases.
701 long_mul(x, y)
702 } else if x.len() < y.len() / 2 {
703 karatsuba_uneven_mul(x, y)
704 } else {
705 // Do our 3 multiplications.
706 let m = y.len() / 2;
707 let (xl, xh) = karatsuba_split(x, m);
708 let (yl, yh) = karatsuba_split(y, m);
709 let sumx = add(xl, xh);
710 let sumy = add(yl, yh);
711 let z0 = karatsuba_mul(xl, yl);
712 let mut z1 = karatsuba_mul(&sumx, &sumy);
713 let z2 = karatsuba_mul(xh, yh);
714 // Properly scale z1, which is `z1 - z2 - zo`.
715 isub(&mut z1, &z2);
716 isub(&mut z1, &z0);
717
718 // Create our result, which is equal to, in little-endian order:
719 // [z0, z1 - z2 - z0, z2]
720 // z1 must be shifted m digits (2^(32m)) over.
721 // z2 must be shifted 2*m digits (2^(64m)) over.
722 let len = z0.len().max(m + z1.len()).max(2 * m + z2.len());
723 let mut result = z0;
724 result.reserve_exact(len - result.len());
725 iadd_impl(&mut result, &z1, m);
726 iadd_impl(&mut result, &z2, 2 * m);
727
728 result
729 }
730 }
731
732 /// Karatsuba multiplication algorithm where y is substantially larger than x.
733 ///
734 /// Assumes `y.len() >= x.len()`.
735 fn karatsuba_uneven_mul(x: &[Limb], mut y: &[Limb]) -> Vec<Limb> {
736 let mut result = Vec::<Limb>::default();
737 result.resize(x.len() + y.len(), 0);
738
739 // This effectively is like grade-school multiplication between
740 // two numbers, except we're using splits on `y`, and the intermediate
741 // step is a Karatsuba multiplication.
742 let mut start = 0;
743 while !y.is_empty() {
744 let m = x.len().min(y.len());
745 let (yl, yh) = karatsuba_split(y, m);
746 let prod = karatsuba_mul(x, yl);
747 iadd_impl(&mut result, &prod, start);
748 y = yh;
749 start += m;
750 }
751 small::normalize(&mut result);
752
753 result
754 }
755
756 /// Forwarder to the proper Karatsuba algorithm.
757 #[inline]
758 fn karatsuba_mul_fwd(x: &[Limb], y: &[Limb]) -> Vec<Limb> {
759 if x.len() < y.len() {
760 karatsuba_mul(x, y)
761 } else {
762 karatsuba_mul(y, x)
763 }
764 }
765
766 /// MulAssign bigint to bigint.
767 #[inline]
768 pub fn imul(x: &mut Vec<Limb>, y: &[Limb]) {
769 if y.len() == 1 {
770 small::imul(x, y[0]);
771 } else {
772 // We're not really in a condition where using Karatsuba
773 // multiplication makes sense, so we're just going to use long
774 // division. ~20% speedup compared to:
775 // *x = karatsuba_mul_fwd(x, y);
776 *x = karatsuba_mul_fwd(x, y);
777 }
778 }
779} // large
780
781// TRAITS
782// ------
783
784/// Traits for shared operations for big integers.
785///
786/// None of these are implemented using normal traits, since these
787/// are very expensive operations, and we want to deliberately
788/// and explicitly use these functions.
789pub(crate) trait Math: Clone + Sized + Default {
790 // DATA
791
792 /// Get access to the underlying data
793 fn data(&self) -> &Vec<Limb>;
794
795 /// Get access to the underlying data
796 fn data_mut(&mut self) -> &mut Vec<Limb>;
797
798 // RELATIVE OPERATIONS
799
800 /// Compare self to y.
801 #[inline]
802 fn compare(&self, y: &Self) -> cmp::Ordering {
803 large::compare(self.data(), y.data())
804 }
805
806 // PROPERTIES
807
808 /// Get the high 64-bits from the bigint and if there are remaining bits.
809 #[inline]
810 fn hi64(&self) -> (u64, bool) {
811 self.data().as_slice().hi64()
812 }
813
814 /// Calculate the bit-length of the big-integer.
815 /// Returns usize::max_value() if the value overflows,
816 /// IE, if `self.data().len() > usize::max_value() / 8`.
817 #[inline]
818 fn bit_length(&self) -> usize {
819 small::bit_length(self.data())
820 }
821
822 // INTEGER CONVERSIONS
823
824 /// Create new big integer from u64.
825 #[inline]
826 fn from_u64(x: u64) -> Self {
827 let mut v = Self::default();
828 let slc = split_u64(x);
829 v.data_mut().extend_from_slice(&slc);
830 v.normalize();
831 v
832 }
833
834 // NORMALIZE
835
836 /// Normalize the integer, so any leading zero values are removed.
837 #[inline]
838 fn normalize(&mut self) {
839 small::normalize(self.data_mut());
840 }
841
842 // ADDITION
843
844 /// AddAssign small integer.
845 #[inline]
846 fn iadd_small(&mut self, y: Limb) {
847 small::iadd(self.data_mut(), y);
848 }
849
850 // MULTIPLICATION
851
852 /// MulAssign small integer.
853 #[inline]
854 fn imul_small(&mut self, y: Limb) {
855 small::imul(self.data_mut(), y);
856 }
857
858 /// Multiply by a power of 2.
859 #[inline]
860 fn imul_pow2(&mut self, n: u32) {
861 self.ishl(n as usize);
862 }
863
864 /// Multiply by a power of 5.
865 #[inline]
866 fn imul_pow5(&mut self, n: u32) {
867 small::imul_pow5(self.data_mut(), n);
868 }
869
870 /// MulAssign by a power of 10.
871 #[inline]
872 fn imul_pow10(&mut self, n: u32) {
873 self.imul_pow5(n);
874 self.imul_pow2(n);
875 }
876
877 // SHIFTS
878
879 /// Shift-left the entire buffer n bits.
880 #[inline]
881 fn ishl(&mut self, n: usize) {
882 small::ishl(self.data_mut(), n);
883 }
884}