lz4_flex/
fastcpy.rs

1//! # FastCpy
2//!
3//! The Rust Compiler calls `memcpy` for slices of unknown length.
4//! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`).
5//! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program.
6//!
7//! `fastcpy` is designed to contain not too much assembly, so the overhead is low.
8//!
9//! As fall back the standard `memcpy` is called
10//!
11//! ## Double Copy Trick
12//! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`).
13//! E.g. Slice of length 6 can be copied with two uncoditional copy operations.
14//!
15//! /// [1, 2, 3, 4, 5, 6]
16//! /// [1, 2, 3, 4]
17//! ///       [3, 4, 5, 6]
18//!
19
20#[inline]
21pub fn slice_copy(src: &[u8], dst: &mut [u8]) {
22    #[inline(never)]
23    #[cold]
24    #[track_caller]
25    fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! {
26        panic!(
27            "source slice length ({}) does not match destination slice length ({})",
28            src_len, dst_len,
29        );
30    }
31
32    if src.len() != dst.len() {
33        len_mismatch_fail(src.len(), dst.len());
34    }
35    let len = src.len();
36
37    if src.is_empty() {
38        return;
39    }
40
41    if len < 4 {
42        short_copy(src, dst);
43        return;
44    }
45
46    if len < 8 {
47        double_copy_trick::<4>(src, dst);
48        return;
49    }
50
51    if len <= 16 {
52        double_copy_trick::<8>(src, dst);
53        return;
54    }
55
56    if len <= 32 {
57        double_copy_trick::<16>(src, dst);
58        return;
59    }
60
61    /// The code will use the vmovdqu instruction to copy 32 bytes at a time.
62    #[cfg(target_feature = "avx")]
63    {
64        if len <= 64 {
65            double_copy_trick::<32>(src, dst);
66            return;
67        }
68    }
69
70    // For larger sizes we use the default, which calls memcpy
71    // memcpy does some virtual memory tricks to copy large chunks of memory.
72    //
73    // The theory should be that the checks above don't cost much relative to the copy call for
74    // larger copies.
75    // The bounds checks in `copy_from_slice` are elided.
76    dst.copy_from_slice(src);
77}
78
79#[inline(always)]
80fn short_copy(src: &[u8], dst: &mut [u8]) {
81    let len = src.len();
82
83    // length 1-3
84    dst[0] = src[0];
85    if len >= 2 {
86        double_copy_trick::<2>(src, dst);
87    }
88}
89
90#[inline(always)]
91/// [1, 2, 3, 4, 5, 6]
92/// [1, 2, 3, 4]
93///       [3, 4, 5, 6]
94fn double_copy_trick<const SIZE: usize>(src: &[u8], dst: &mut [u8]) {
95    dst[0..SIZE].copy_from_slice(&src[0..SIZE]);
96    dst[src.len() - SIZE..].copy_from_slice(&src[src.len() - SIZE..]);
97}
98
99#[cfg(test)]
100mod tests {
101    use super::slice_copy;
102    use proptest::prelude::*;
103    proptest! {
104        #[test]
105        fn test_fast_short_slice_copy(left: Vec<u8>) {
106            let mut right = vec![0u8; left.len()];
107            slice_copy(&left, &mut right);
108            prop_assert_eq!(&left, &right);
109        }
110    }
111
112    #[test]
113    fn test_fast_short_slice_copy_edge_cases() {
114        for len in 0..(512 * 2) {
115            let left = (0..len).map(|i| i as u8).collect::<Vec<_>>();
116            let mut right = vec![0u8; len];
117            slice_copy(&left, &mut right);
118            assert_eq!(left, right);
119        }
120    }
121
122    #[test]
123    fn test_fail2() {
124        let left = vec![
125            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
126            24, 25, 26, 27, 28, 29, 30, 31, 32,
127        ];
128        let mut right = vec![0u8; left.len()];
129        slice_copy(&left, &mut right);
130        assert_eq!(left, right);
131    }
132
133    #[test]
134    fn test_fail() {
135        let left = vec![
136            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138            0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139        ];
140        let mut right = vec![0u8; left.len()];
141        slice_copy(&left, &mut right);
142        assert_eq!(left, right);
143    }
144}