1use crate::pod::POD;
2use crate::vector::{V128, V256};
3use crate::SIMD256;
4
5pub(crate) const SHUFFLE_U16X8: V128 = V128::from_bytes([
6 0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06, 0x09, 0x08, 0x0b, 0x0a, 0x0d, 0x0c, 0x0f, 0x0e, ]);
9
10pub(crate) const SHUFFLE_U32X4: V128 = V128::from_bytes([
11 0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c, ]);
14
15pub(crate) const SHUFFLE_U64X2: V128 = V128::from_bytes([
16 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, ]);
19
20pub(crate) const SHUFFLE_U16X16: V256 = SHUFFLE_U16X8.x2();
21
22pub(crate) const SHUFFLE_U32X8: V256 = SHUFFLE_U32X4.x2();
23
24pub(crate) const SHUFFLE_U64X4: V256 = SHUFFLE_U64X2.x2();
25
26pub unsafe trait BSwap: POD {
27 const LANES: usize;
28 fn swap_single(x: Self) -> Self;
29 fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256;
30}
31
32unsafe impl BSwap for u16 {
33 const LANES: usize = 16;
34
35 #[inline(always)]
36 fn swap_single(x: Self) -> Self {
37 x.swap_bytes()
38 }
39
40 #[inline(always)]
41 fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
42 s.u16x16_bswap(a)
43 }
44}
45
46unsafe impl BSwap for u32 {
47 const LANES: usize = 8;
48
49 #[inline(always)]
50 fn swap_single(x: Self) -> Self {
51 x.swap_bytes()
52 }
53
54 #[inline(always)]
55 fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
56 s.u32x8_bswap(a)
57 }
58}
59
60unsafe impl BSwap for u64 {
61 const LANES: usize = 4;
62
63 #[inline(always)]
64 fn swap_single(x: Self) -> Self {
65 x.swap_bytes()
66 }
67
68 #[inline(always)]
69 fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
70 s.u64x4_bswap(a)
71 }
72}
73
74#[inline(always)]
75pub unsafe fn bswap_fallback<T>(mut src: *const T, len: usize, mut dst: *mut T)
76where
77 T: BSwap,
78{
79 let end = src.add(len);
80 while src < end {
81 let x = src.read();
82 let y = <T as BSwap>::swap_single(x);
83 dst.write(y);
84 src = src.add(1);
85 dst = dst.add(1);
86 }
87}
88
89#[inline(always)]
90pub unsafe fn bswap_simd<S: SIMD256, T>(s: S, mut src: *const T, mut len: usize, mut dst: *mut T)
91where
92 T: BSwap,
93{
94 let end = src.add(len / T::LANES * T::LANES);
95 while src < end {
96 let x = s.v256_load_unaligned(src.cast());
97 let y = <T as BSwap>::swap_simd(s, x);
98 s.v256_store_unaligned(dst.cast(), y);
99 src = src.add(T::LANES);
100 dst = dst.add(T::LANES);
101 }
102 len %= T::LANES;
103
104 bswap_fallback(src, len, dst);
105}