vsimd/
simulation.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
use crate::vector::V128;

use core::mem::transmute as t;

#[cfg(miri)]
use core::cmp::{max, min};

// TODO: waiting for MIRI's support

#[cfg(miri)]
#[inline(always)]
pub fn u8x16_max(a: V128, b: V128) -> V128 {
    let (a, b) = (a.as_bytes(), b.as_bytes());
    let mut c = [0; 16];
    for i in 0..16 {
        c[i] = max(a[i], b[i]);
    }
    V128::from_bytes(c)
}

#[cfg(miri)]
#[inline(always)]
pub fn u8x16_min(a: V128, b: V128) -> V128 {
    let (a, b) = (a.as_bytes(), b.as_bytes());
    let mut c = [0; 16];
    for i in 0..16 {
        c[i] = min(a[i], b[i]);
    }
    V128::from_bytes(c)
}

#[allow(clippy::needless_range_loop)]
#[inline(always)]
pub fn u8x16_bitmask(a: V128) -> u16 {
    // FIXME: is it defined behavior?
    // https://github.com/rust-lang/miri/issues/2617
    // https://github.com/rust-lang/stdarch/issues/1347

    let a = a.as_bytes();
    let mut m: u16 = 0;
    for i in 0..16 {
        m |= ((a[i] >> 7) as u16) << i;
    }
    m
}

#[allow(clippy::needless_range_loop)]
#[inline(always)]
pub fn u16x8_shr(a: V128, imm8: u8) -> V128 {
    let mut a: [u16; 8] = unsafe { t(a) };
    for i in 0..8 {
        a[i] >>= imm8;
    }
    unsafe { t(a) }
}

#[allow(clippy::needless_range_loop)]
#[inline(always)]
pub fn u16x8_shl(a: V128, imm8: u8) -> V128 {
    let mut a: [u16; 8] = unsafe { t(a) };
    for i in 0..8 {
        a[i] <<= imm8;
    }
    unsafe { t(a) }
}

#[inline(always)]
pub fn i16x8_packus(a: V128, b: V128) -> V128 {
    let a: [i16; 8] = unsafe { t(a) };
    let b: [i16; 8] = unsafe { t(b) };
    let sat_u8 = |x: i16| {
        if x < 0 {
            0
        } else if x > 255 {
            255
        } else {
            x as u8
        }
    };
    let mut c: [u8; 16] = [0; 16];
    for i in 0..8 {
        c[i] = sat_u8(a[i]);
        c[i + 8] = sat_u8(b[i]);
    }
    V128::from_bytes(c)
}