vsimd/
bswap.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
use crate::pod::POD;
use crate::vector::{V128, V256};
use crate::SIMD256;

pub(crate) const SHUFFLE_U16X8: V128 = V128::from_bytes([
    0x01, 0x00, 0x03, 0x02, 0x05, 0x04, 0x07, 0x06, //
    0x09, 0x08, 0x0b, 0x0a, 0x0d, 0x0c, 0x0f, 0x0e, //
]);

pub(crate) const SHUFFLE_U32X4: V128 = V128::from_bytes([
    0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, //
    0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c, //
]);

pub(crate) const SHUFFLE_U64X2: V128 = V128::from_bytes([
    0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, //
    0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, //
]);

pub(crate) const SHUFFLE_U16X16: V256 = SHUFFLE_U16X8.x2();

pub(crate) const SHUFFLE_U32X8: V256 = SHUFFLE_U32X4.x2();

pub(crate) const SHUFFLE_U64X4: V256 = SHUFFLE_U64X2.x2();

pub unsafe trait BSwap: POD {
    const LANES: usize;
    fn swap_single(x: Self) -> Self;
    fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256;
}

unsafe impl BSwap for u16 {
    const LANES: usize = 16;

    #[inline(always)]
    fn swap_single(x: Self) -> Self {
        x.swap_bytes()
    }

    #[inline(always)]
    fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
        s.u16x16_bswap(a)
    }
}

unsafe impl BSwap for u32 {
    const LANES: usize = 8;

    #[inline(always)]
    fn swap_single(x: Self) -> Self {
        x.swap_bytes()
    }

    #[inline(always)]
    fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
        s.u32x8_bswap(a)
    }
}

unsafe impl BSwap for u64 {
    const LANES: usize = 4;

    #[inline(always)]
    fn swap_single(x: Self) -> Self {
        x.swap_bytes()
    }

    #[inline(always)]
    fn swap_simd<S: SIMD256>(s: S, a: V256) -> V256 {
        s.u64x4_bswap(a)
    }
}

#[inline(always)]
pub unsafe fn bswap_fallback<T>(mut src: *const T, len: usize, mut dst: *mut T)
where
    T: BSwap,
{
    let end = src.add(len);
    while src < end {
        let x = src.read();
        let y = <T as BSwap>::swap_single(x);
        dst.write(y);
        src = src.add(1);
        dst = dst.add(1);
    }
}

#[inline(always)]
pub unsafe fn bswap_simd<S: SIMD256, T>(s: S, mut src: *const T, mut len: usize, mut dst: *mut T)
where
    T: BSwap,
{
    let end = src.add(len / T::LANES * T::LANES);
    while src < end {
        let x = s.v256_load_unaligned(src.cast());
        let y = <T as BSwap>::swap_simd(s, x);
        s.v256_store_unaligned(dst.cast(), y);
        src = src.add(T::LANES);
        dst = dst.add(T::LANES);
    }
    len %= T::LANES;

    bswap_fallback(src, len, dst);
}