vsimd/
bswap.rs

1use crate::pod::POD;
2use crate::vector::{V128, V256};
3use crate::SIMD256;
4
5pub(crate) const SHUFFLE_U16X8: V128 = V128::from_bytes([
6    0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06, //
7    0x09, 0x08, 0x0b, 0x0a, 0x0d, 0x0c, 0x0f, 0x0e, //
8]);
9
10pub(crate) const SHUFFLE_U32X4: V128 = V128::from_bytes([
11    0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, //
12    0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c, //
13]);
14
15pub(crate) const SHUFFLE_U64X2: V128 = V128::from_bytes([
16    0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, //
17    0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, //
18]);
19
20pub(crate) const SHUFFLE_U16X16: V256 = SHUFFLE_U16X8.x2();
21
22pub(crate) const SHUFFLE_U32X8: V256 = SHUFFLE_U32X4.x2();
23
24pub(crate) const SHUFFLE_U64X4: V256 = SHUFFLE_U64X2.x2();
25
26pub unsafe trait BSwap: POD {
27    const LANES: usize;
28    fn swap_single(x: Self) -> Self;
29    fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256;
30}
31
32unsafe impl BSwap for u16 {
33    const LANES: usize = 16;
34
35    #[inline(always)]
36    fn swap_single(x: Self) -> Self {
37        x.swap_bytes()
38    }
39
40    #[inline(always)]
41    fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
42        s.u16x16_bswap(a)
43    }
44}
45
46unsafe impl BSwap for u32 {
47    const LANES: usize = 8;
48
49    #[inline(always)]
50    fn swap_single(x: Self) -> Self {
51        x.swap_bytes()
52    }
53
54    #[inline(always)]
55    fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
56        s.u32x8_bswap(a)
57    }
58}
59
60unsafe impl BSwap for u64 {
61    const LANES: usize = 4;
62
63    #[inline(always)]
64    fn swap_single(x: Self) -> Self {
65        x.swap_bytes()
66    }
67
68    #[inline(always)]
69    fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
70        s.u64x4_bswap(a)
71    }
72}
73
74#[inline(always)]
75pub unsafe fn bswap_fallback<T>(mut src: *const T, len: usize, mut dst: *mut T)
76where
77    T: BSwap,
78{
79    let end = src.add(len);
80    while src < end {
81        let x = src.read();
82        let y = <T as BSwap>::swap_single(x);
83        dst.write(y);
84        src = src.add(1);
85        dst = dst.add(1);
86    }
87}
88
89#[inline(always)]
90pub unsafe fn bswap_simd<S: SIMD256, T>(s: S, mut src: *const T, mut len: usize, mut dst: *mut T)
91where
92    T: BSwap,
93{
94    let end = src.add(len / T::LANES * T::LANES);
95    while src < end {
96        let x = s.v256_load_unaligned(src.cast());
97        let y = <T as BSwap>::swap_simd(s, x);
98        s.v256_store_unaligned(dst.cast(), y);
99        src = src.add(T::LANES);
100        dst = dst.add(T::LANES);
101    }
102    len %= T::LANES;
103
104    bswap_fallback(src, len, dst);
105}