use crate::isa::{NEON, SSE2, SSE41, WASM128};
use crate::unified;
use crate::vector::V128;
use crate::SIMD64;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use crate::isa::SSSE3;
#[cfg(any(
any(target_arch = "x86", target_arch = "x86_64"),
any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"),
target_arch = "wasm32"
))]
use core::mem::transmute as t;
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
#[cfg(all(feature = "unstable", target_arch = "arm"))]
use core::arch::arm::*;
#[cfg(target_arch = "aarch64")]
use core::arch::aarch64::*;
#[cfg(target_arch = "wasm32")]
use core::arch::wasm32::*;
pub unsafe trait SIMD128: SIMD64 {
#[inline(always)]
unsafe fn v128_load(self, addr: *const u8) -> V128 {
debug_assert_ptr_align!(addr, 16);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return t(_mm_load_si128(addr.cast()));
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return self.v128_load_unaligned(addr);
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return self.v128_load_unaligned(addr);
}
{
let _ = addr;
unreachable!()
}
}
#[inline(always)]
unsafe fn v128_load_unaligned(self, addr: *const u8) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return t(_mm_loadu_si128(addr.cast()));
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return t(vld1q_u8(addr.cast()));
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return t(v128_load(addr.cast()));
}
{
let _ = addr;
unreachable!()
}
}
#[inline(always)]
unsafe fn v128_store(self, addr: *mut u8, a: V128) {
debug_assert_ptr_align!(addr, 16);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return _mm_store_si128(addr.cast(), t(a));
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return self.v128_store_unaligned(addr, a);
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return self.v128_store_unaligned(addr, a);
}
{
let _ = (addr, a);
unreachable!()
}
}
#[inline(always)]
unsafe fn v128_store_unaligned(self, addr: *mut u8, a: V128) {
if cfg!(miri) {
return addr.cast::<V128>().write_unaligned(a);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return _mm_storeu_si128(addr.cast(), t(a));
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return vst1q_u8(addr.cast(), t(a));
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return v128_store(addr.cast(), t(a));
}
{
let _ = (addr, a);
unreachable!()
}
}
#[inline(always)]
fn v128_create_zero(self) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_setzero_si128()) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vdupq_n_u8(0)) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(u8x16_splat(0)) };
}
{
unreachable!()
}
}
#[inline(always)]
fn v128_not(self, a: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe {
let a = t(a);
t(_mm_xor_si128(a, _mm_cmpeq_epi8(a, a)))
};
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vmvnq_u8(t(a))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(v128_not(t(a))) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn v128_and(self, a: V128, b: V128) -> V128 {
unified::and(self, a, b)
}
#[inline(always)]
fn v128_or(self, a: V128, b: V128) -> V128 {
unified::or(self, a, b)
}
#[inline(always)]
fn v128_xor(self, a: V128, b: V128) -> V128 {
unified::xor(self, a, b)
}
#[inline(always)]
fn v128_andnot(self, a: V128, b: V128) -> V128 {
unified::andnot(self, a, b)
}
#[inline(always)]
fn v128_all_zero(self, a: V128) -> bool {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE41) {
return unsafe {
let a = t(a);
_mm_testz_si128(a, a) != 0
};
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe {
let a1: uint32x2x2_t = t(a);
let a2: uint32x2_t = vorr_u32(a1.0, a1.1);
vget_lane_u32::<0>(vpmax_u32(a2, a2)) == 0
};
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { vmaxvq_u8(t(a)) == 0 };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { !v128_any_true(t(a)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u8x16_splat(self, x: u8) -> V128 {
unified::splat(self, x)
}
#[inline(always)]
fn u16x8_splat(self, x: u16) -> V128 {
unified::splat(self, x)
}
#[inline(always)]
fn u32x4_splat(self, x: u32) -> V128 {
unified::splat(self, x)
}
#[inline(always)]
fn u64x2_splat(self, x: u64) -> V128 {
unified::splat(self, x)
}
#[inline(always)]
fn i8x16_splat(self, x: i8) -> V128 {
unified::splat(self, x)
}
#[inline(always)]
fn i16x8_splat(self, x: i16) -> V128 {
unified::splat(self, x)
}
#[inline(always)]
fn i32x4_splat(self, x: i32) -> V128 {
unified::splat(self, x)
}
#[inline(always)]
fn i64x2_splat(self, x: i64) -> V128 {
unified::splat(self, x)
}
#[inline(always)]
fn u8x16_add(self, a: V128, b: V128) -> V128 {
unified::add::<_, u8, _>(self, a, b)
}
#[inline(always)]
fn u16x8_add(self, a: V128, b: V128) -> V128 {
unified::add::<_, u16, _>(self, a, b)
}
#[inline(always)]
fn u32x4_add(self, a: V128, b: V128) -> V128 {
unified::add::<_, u32, _>(self, a, b)
}
#[inline(always)]
fn u64x2_add(self, a: V128, b: V128) -> V128 {
unified::add::<_, u64, _>(self, a, b)
}
#[inline(always)]
fn u8x16_sub(self, a: V128, b: V128) -> V128 {
unified::sub::<_, u8, _>(self, a, b)
}
#[inline(always)]
fn u16x8_sub(self, a: V128, b: V128) -> V128 {
unified::sub::<_, u16, _>(self, a, b)
}
#[inline(always)]
fn u32x4_sub(self, a: V128, b: V128) -> V128 {
unified::sub::<_, u32, _>(self, a, b)
}
#[inline(always)]
fn u64x2_sub(self, a: V128, b: V128) -> V128 {
unified::sub::<_, u64, _>(self, a, b)
}
#[inline(always)]
fn u8x16_sub_sat(self, a: V128, b: V128) -> V128 {
unified::sub_sat::<_, u8, _>(self, a, b)
}
#[inline(always)]
fn u16x8_sub_sat(self, a: V128, b: V128) -> V128 {
unified::sub_sat::<_, u16, _>(self, a, b)
}
#[inline(always)]
fn i8x16_sub_sat(self, a: V128, b: V128) -> V128 {
unified::sub_sat::<_, i8, _>(self, a, b)
}
#[inline(always)]
fn i16x8_sub_sat(self, a: V128, b: V128) -> V128 {
unified::sub_sat::<_, i16, _>(self, a, b)
}
#[inline(always)]
fn i16x8_mul_lo(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_mullo_epi16(t(a), t(b))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vmulq_s16(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(i16x8_mul(t(a), t(b))) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn i32x4_mul_lo(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE41) {
return unsafe { t(_mm_mullo_epi32(t(a), t(b))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vmulq_s32(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(i32x4_mul(t(a), t(b))) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u16x8_shl<const IMM8: i32>(self, a: V128) -> V128 {
if cfg!(miri) {
return crate::simulation::u16x8_shl(a, IMM8 as u8);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_slli_epi16::<IMM8>(t(a))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vshlq_n_u16::<IMM8>(t(a))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(u16x8_shl(t(a), IMM8 as u32)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u32x4_shl<const IMM8: i32>(self, a: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_slli_epi32::<IMM8>(t(a))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vshlq_n_u32::<IMM8>(t(a))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(u32x4_shl(t(a), IMM8 as u32)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u16x8_shr<const IMM8: i32>(self, a: V128) -> V128 {
if cfg!(miri) {
return crate::simulation::u16x8_shr(a, IMM8 as u8);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_srli_epi16::<IMM8>(t(a))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vshrq_n_u16::<IMM8>(t(a))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(u16x8_shr(t(a), IMM8 as u32)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u32x4_shr<const IMM8: i32>(self, a: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_srli_epi32::<IMM8>(t(a))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vshrq_n_u32::<IMM8>(t(a))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(u32x4_shr(t(a), IMM8 as u32)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u8x16_eq(self, a: V128, b: V128) -> V128 {
unified::eq::<_, u8, _>(self, a, b)
}
#[inline(always)]
fn u16x8_eq(self, a: V128, b: V128) -> V128 {
unified::eq::<_, u16, _>(self, a, b)
}
#[inline(always)]
fn u32x4_eq(self, a: V128, b: V128) -> V128 {
unified::eq::<_, u32, _>(self, a, b)
}
#[inline(always)]
fn u8x16_lt(self, a: V128, b: V128) -> V128 {
unified::lt::<_, u8, _>(self, a, b)
}
#[inline(always)]
fn u16x8_lt(self, a: V128, b: V128) -> V128 {
unified::lt::<_, u16, _>(self, a, b)
}
#[inline(always)]
fn u32x4_lt(self, a: V128, b: V128) -> V128 {
unified::lt::<_, u32, _>(self, a, b)
}
#[inline(always)]
fn i8x16_lt(self, a: V128, b: V128) -> V128 {
unified::lt::<_, i8, _>(self, a, b)
}
#[inline(always)]
fn i16x8_lt(self, a: V128, b: V128) -> V128 {
unified::lt::<_, i16, _>(self, a, b)
}
#[inline(always)]
fn i32x4_lt(self, a: V128, b: V128) -> V128 {
unified::lt::<_, i32, _>(self, a, b)
}
#[inline(always)]
fn u8x16_max(self, a: V128, b: V128) -> V128 {
unified::max::<_, u8, _>(self, a, b)
}
#[inline(always)]
fn u16x8_max(self, a: V128, b: V128) -> V128 {
unified::max::<_, u16, _>(self, a, b)
}
#[inline(always)]
fn u32x4_max(self, a: V128, b: V128) -> V128 {
unified::max::<_, u32, _>(self, a, b)
}
#[inline(always)]
fn i8x16_max(self, a: V128, b: V128) -> V128 {
unified::max::<_, i8, _>(self, a, b)
}
#[inline(always)]
fn i16x8_max(self, a: V128, b: V128) -> V128 {
unified::max::<_, i16, _>(self, a, b)
}
#[inline(always)]
fn i32x4_max(self, a: V128, b: V128) -> V128 {
unified::max::<_, i32, _>(self, a, b)
}
#[inline(always)]
fn u8x16_min(self, a: V128, b: V128) -> V128 {
unified::min::<_, u8, _>(self, a, b)
}
#[inline(always)]
fn u16x8_min(self, a: V128, b: V128) -> V128 {
unified::min::<_, u16, _>(self, a, b)
}
#[inline(always)]
fn u32x4_min(self, a: V128, b: V128) -> V128 {
unified::min::<_, u32, _>(self, a, b)
}
#[inline(always)]
fn i8x16_min(self, a: V128, b: V128) -> V128 {
unified::min::<_, i8, _>(self, a, b)
}
#[inline(always)]
fn i16x8_min(self, a: V128, b: V128) -> V128 {
unified::min::<_, i16, _>(self, a, b)
}
#[inline(always)]
fn i32x4_min(self, a: V128, b: V128) -> V128 {
unified::min::<_, i32, _>(self, a, b)
}
#[inline(always)]
fn u8x16_swizzle(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSSE3) {
return unsafe { t(_mm_shuffle_epi8(t(a), t(b))) };
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe {
let (a, b) = (t(a), t(b));
let a = uint8x8x2_t(vget_low_u8(a), vget_high_u8(a));
let b = (vget_low_u8(b), vget_high_u8(b));
let c = (vtbl2_u8(a, b.0), vtbl2_u8(a, b.1));
t([c.0, c.1])
};
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vqtbl1q_u8(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(u8x16_swizzle(t(a), t(b))) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u16x8_bswap(self, a: V128) -> V128 {
if matches_isa!(Self, SSE41 | WASM128) {
return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U16X8);
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vrev16q_u8(t(a))) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u32x4_bswap(self, a: V128) -> V128 {
if matches_isa!(Self, SSE41 | WASM128) {
return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U32X4);
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vrev32q_u8(t(a))) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u64x2_bswap(self, a: V128) -> V128 {
if matches_isa!(Self, SSE41 | WASM128) {
return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U64X2);
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vrev64q_u8(t(a))) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u8x16_any_zero(self, a: V128) -> bool {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
let is_zero = self.u8x16_eq(a, self.v128_create_zero());
return self.u8x16_bitmask(is_zero) != 0;
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe {
let a: uint8x8x2_t = t(a);
let a = vpmin_u8(a.0, a.1);
let m: u64 = t(vtst_u8(a, a));
m != u64::MAX
};
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { vminvq_u8(t(a)) == 0 };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { !u8x16_all_true(t(a)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u8x16_bitmask(self, a: V128) -> u16 {
if cfg!(miri) {
return crate::simulation::u8x16_bitmask(a);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { _mm_movemask_epi8(t(a)) as u16 };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
unimplemented!()
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { u8x16_bitmask(t(a)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u8x16_reduce_max(self, a: V128) -> u8 {
if matches_isa!(Self, SSE41 | WASM128) {
unimplemented!()
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
unimplemented!()
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { vmaxvq_u8(t(a)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn u8x16_reduce_min(self, a: V128) -> u8 {
if matches_isa!(Self, SSE41 | WASM128) {
unimplemented!()
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
unimplemented!()
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { vminvq_u8(t(a)) };
}
{
let _ = a;
unreachable!()
}
}
#[inline(always)]
fn v128_bsl(self, a: V128, b: V128, c: V128) -> V128 {
if matches_isa!(Self, SSE2 | WASM128) {
return self.v128_xor(self.v128_and(self.v128_xor(b, c), a), c);
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vbslq_u8(t(a), t(b), t(c))) };
}
{
let _ = (a, b, c);
unreachable!()
}
}
#[inline(always)]
fn u8x16_zip_lo(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_unpacklo_epi8(t(a), t(b))) };
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vzipq_u8(t(a), t(b)).0) };
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vzip1q_u8(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u8x16_zip_hi(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_unpackhi_epi8(t(a), t(b))) };
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vzipq_u8(t(a), t(b)).1) };
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vzip2q_u8(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u16x8_zip_lo(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_unpacklo_epi16(t(a), t(b))) };
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vzipq_u16(t(a), t(b)).0) };
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vzip1q_u16(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u16x8_zip_hi(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_unpackhi_epi16(t(a), t(b))) };
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vzipq_u16(t(a), t(b)).1) };
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vzip2q_u16(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u32x4_zip_lo(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_unpacklo_epi32(t(a), t(b))) };
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vzipq_u32(t(a), t(b)).0) };
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vzip1q_u32(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u32x4_shuffle::<0, 4, 1, 5>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u32x4_zip_hi(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_unpackhi_epi32(t(a), t(b))) };
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vzipq_u32(t(a), t(b)).1) };
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vzip2q_u32(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u32x4_shuffle::<2, 6, 3, 7>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u64x2_zip_lo(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_unpacklo_epi64(t(a), t(b))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe {
let (a, b): ([u64; 2], [u64; 2]) = (t(a), t(b));
t([a[0], b[0]])
};
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u64x2_shuffle::<0, 2>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u64x2_zip_hi(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_unpackhi_epi64(t(a), t(b))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe {
let (a, b): ([u64; 2], [u64; 2]) = (t(a), t(b));
t([a[1], b[1]])
};
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u64x2_shuffle::<1, 3>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u8x16_unzip_even(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
unimplemented!()
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vuzpq_u8(t(a), t(b)).0) };
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vuzp1q_u8(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u8x16_unzip_odd(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
unimplemented!()
}
#[cfg(all(feature = "unstable", target_arch = "arm"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vuzpq_u8(t(a), t(b)).1) };
}
#[cfg(target_arch = "aarch64")]
if matches_isa!(Self, NEON) {
return unsafe { t(vuzp2q_u8(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
let (a, b) = unsafe { (t(a), t(b)) };
let ans = u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(a, b);
return unsafe { t(ans) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u16x8_mul_hi(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_mulhi_epu16(t(a), t(b))) };
}
if matches_isa!(Self, NEON | WASM128) {
unimplemented!()
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn i16x8_mul_hi(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_mulhi_epi16(t(a), t(b))) };
}
if matches_isa!(Self, NEON | WASM128) {
unimplemented!()
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn i16x8_maddubs(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSSE3) {
return unsafe { t(_mm_maddubs_epi16(t(a), t(b))) };
}
if matches_isa!(Self, NEON | WASM128) {
unimplemented!()
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u16x8_blend<const IMM8: i32>(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE41) {
return unsafe { t(_mm_blend_epi16::<IMM8>(t(a), t(b))) };
}
if matches_isa!(Self, NEON | WASM128) {
unimplemented!()
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u8x16_blendv(self, a: V128, b: V128, c: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE41) {
return unsafe { t(_mm_blendv_epi8(t(a), t(b), t(c))) };
}
if matches_isa!(Self, NEON | WASM128) {
unimplemented!()
}
{
let _ = (a, b, c);
unreachable!()
}
}
#[inline(always)]
fn i16x8_madd(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_madd_epi16(t(a), t(b))) };
}
if matches_isa!(Self, NEON | WASM128) {
unimplemented!()
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn u8x16_avgr(self, a: V128, b: V128) -> V128 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_avg_epu8(t(a), t(b))) };
}
#[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
if matches_isa!(Self, NEON) {
return unsafe { t(vrhaddq_u8(t(a), t(b))) };
}
#[cfg(target_arch = "wasm32")]
if matches_isa!(Self, WASM128) {
return unsafe { t(u8x16_avgr(t(a), t(b))) };
}
{
let _ = (a, b);
unreachable!()
}
}
#[inline(always)]
fn i8x16_add_sat(self, a: V128, b: V128) -> V128 {
unified::add_sat::<_, i8, _>(self, a, b)
}
#[inline(always)]
fn u8x16_add_sat(self, a: V128, b: V128) -> V128 {
unified::add_sat::<_, u8, _>(self, a, b)
}
#[inline(always)]
fn i16x8_packus(self, a: V128, b: V128) -> V128 {
if cfg!(miri) {
return crate::simulation::i16x8_packus(a, b);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if matches_isa!(Self, SSE2) {
return unsafe { t(_mm_packus_epi16(t(a), t(b))) };
}
{
let _ = (a, b);
unreachable!()
}
}
}