1use crate::isa::{NEON, SSE2, SSE41, WASM128};
2use crate::unified;
3use crate::vector::V128;
4use crate::SIMD64;
5
6#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
7use crate::isa::SSSE3;
8
9#[cfg(any(
10 any(target_arch = "x86", target_arch = "x86_64"),
11 any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"),
12 target_arch = "wasm32"
13))]
14use core::mem::transmute as t;
15
16#[cfg(target_arch = "x86")]
17use core::arch::x86::*;
18
19#[cfg(target_arch = "x86_64")]
20use core::arch::x86_64::*;
21
22#[cfg(all(feature = "unstable", target_arch = "arm"))]
23use core::arch::arm::*;
24
25#[cfg(target_arch = "aarch64")]
26use core::arch::aarch64::*;
27
28#[cfg(target_arch = "wasm32")]
29use core::arch::wasm32::*;
30
31pub unsafe trait SIMD128: SIMD64 {
32 #[inline(always)]
34 unsafe fn v128_load(self, addr: *const u8) -> V128 {
35 debug_assert_ptr_align!(addr, 16);
36
37 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
38 if matches_isa!(Self, SSE2) {
39 return t(_mm_load_si128(addr.cast()));
40 }
41 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
42 if matches_isa!(Self, NEON) {
43 return self.v128_load_unaligned(addr);
44 }
45 #[cfg(target_arch = "wasm32")]
46 if matches_isa!(Self, WASM128) {
47 return self.v128_load_unaligned(addr);
48 }
49 {
50 let _ = addr;
51 unreachable!()
52 }
53 }
54
55 #[inline(always)]
57 unsafe fn v128_load_unaligned(self, addr: *const u8) -> V128 {
58 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
59 if matches_isa!(Self, SSE2) {
60 return t(_mm_loadu_si128(addr.cast()));
61 }
62 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
63 if matches_isa!(Self, NEON) {
64 return t(vld1q_u8(addr.cast()));
65 }
66 #[cfg(target_arch = "wasm32")]
67 if matches_isa!(Self, WASM128) {
68 return t(v128_load(addr.cast()));
69 }
70 {
71 let _ = addr;
72 unreachable!()
73 }
74 }
75
76 #[inline(always)]
78 unsafe fn v128_store(self, addr: *mut u8, a: V128) {
79 debug_assert_ptr_align!(addr, 16);
80
81 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
82 if matches_isa!(Self, SSE2) {
83 return _mm_store_si128(addr.cast(), t(a));
84 }
85 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
86 if matches_isa!(Self, NEON) {
87 return self.v128_store_unaligned(addr, a);
88 }
89 #[cfg(target_arch = "wasm32")]
90 if matches_isa!(Self, WASM128) {
91 return self.v128_store_unaligned(addr, a);
92 }
93 {
94 let _ = (addr, a);
95 unreachable!()
96 }
97 }
98
99 #[inline(always)]
101 unsafe fn v128_store_unaligned(self, addr: *mut u8, a: V128) {
102 if cfg!(miri) {
103 return addr.cast::<V128>().write_unaligned(a);
104 }
105 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
106 if matches_isa!(Self, SSE2) {
107 return _mm_storeu_si128(addr.cast(), t(a));
108 }
109 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
110 if matches_isa!(Self, NEON) {
111 return vst1q_u8(addr.cast(), t(a));
112 }
113 #[cfg(target_arch = "wasm32")]
114 if matches_isa!(Self, WASM128) {
115 return v128_store(addr.cast(), t(a));
116 }
117 {
118 let _ = (addr, a);
119 unreachable!()
120 }
121 }
122
123 #[inline(always)]
125 fn v128_create_zero(self) -> V128 {
126 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
127 if matches_isa!(Self, SSE2) {
128 return unsafe { t(_mm_setzero_si128()) };
129 }
130 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
131 if matches_isa!(Self, NEON) {
132 return unsafe { t(vdupq_n_u8(0)) };
133 }
134 #[cfg(target_arch = "wasm32")]
135 if matches_isa!(Self, WASM128) {
136 return unsafe { t(u8x16_splat(0)) };
137 }
138 {
139 unreachable!()
140 }
141 }
142
143 #[inline(always)]
147 fn v128_not(self, a: V128) -> V128 {
148 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
149 if matches_isa!(Self, SSE2) {
150 return unsafe {
151 let a = t(a);
152 t(_mm_xor_si128(a, _mm_cmpeq_epi8(a, a)))
153 };
154 }
155 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
156 if matches_isa!(Self, NEON) {
157 return unsafe { t(vmvnq_u8(t(a))) };
158 }
159 #[cfg(target_arch = "wasm32")]
160 if matches_isa!(Self, WASM128) {
161 return unsafe { t(v128_not(t(a))) };
162 }
163 {
164 let _ = a;
165 unreachable!()
166 }
167 }
168
169 #[inline(always)]
171 fn v128_and(self, a: V128, b: V128) -> V128 {
172 unified::and(self, a, b)
173 }
174
175 #[inline(always)]
177 fn v128_or(self, a: V128, b: V128) -> V128 {
178 unified::or(self, a, b)
179 }
180
181 #[inline(always)]
183 fn v128_xor(self, a: V128, b: V128) -> V128 {
184 unified::xor(self, a, b)
185 }
186
187 #[inline(always)]
189 fn v128_andnot(self, a: V128, b: V128) -> V128 {
190 unified::andnot(self, a, b)
191 }
192
193 #[inline(always)]
197 fn v128_all_zero(self, a: V128) -> bool {
198 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
199 if matches_isa!(Self, SSE41) {
200 return unsafe {
201 let a = t(a);
202 _mm_testz_si128(a, a) != 0
203 };
204 }
205 #[cfg(all(feature = "unstable", target_arch = "arm"))]
206 if matches_isa!(Self, NEON) {
207 return unsafe {
208 let a1: uint32x2x2_t = t(a);
209 let a2: uint32x2_t = vorr_u32(a1.0, a1.1);
210 vget_lane_u32::<0>(vpmax_u32(a2, a2)) == 0
211 };
212 }
213 #[cfg(target_arch = "aarch64")]
214 if matches_isa!(Self, NEON) {
215 return unsafe { vmaxvq_u8(t(a)) == 0 };
216 }
217 #[cfg(target_arch = "wasm32")]
218 if matches_isa!(Self, WASM128) {
219 return unsafe { !v128_any_true(t(a)) };
220 }
221 {
222 let _ = a;
223 unreachable!()
224 }
225 }
226
227 #[inline(always)]
229 fn u8x16_splat(self, x: u8) -> V128 {
230 unified::splat(self, x)
231 }
232
233 #[inline(always)]
235 fn u16x8_splat(self, x: u16) -> V128 {
236 unified::splat(self, x)
237 }
238
239 #[inline(always)]
241 fn u32x4_splat(self, x: u32) -> V128 {
242 unified::splat(self, x)
243 }
244
245 #[inline(always)]
247 fn u64x2_splat(self, x: u64) -> V128 {
248 unified::splat(self, x)
249 }
250
251 #[inline(always)]
253 fn i8x16_splat(self, x: i8) -> V128 {
254 unified::splat(self, x)
255 }
256
257 #[inline(always)]
259 fn i16x8_splat(self, x: i16) -> V128 {
260 unified::splat(self, x)
261 }
262
263 #[inline(always)]
265 fn i32x4_splat(self, x: i32) -> V128 {
266 unified::splat(self, x)
267 }
268
269 #[inline(always)]
271 fn i64x2_splat(self, x: i64) -> V128 {
272 unified::splat(self, x)
273 }
274
275 #[inline(always)]
277 fn u8x16_add(self, a: V128, b: V128) -> V128 {
278 unified::add::<_, u8, _>(self, a, b)
279 }
280
281 #[inline(always)]
283 fn u16x8_add(self, a: V128, b: V128) -> V128 {
284 unified::add::<_, u16, _>(self, a, b)
285 }
286
287 #[inline(always)]
289 fn u32x4_add(self, a: V128, b: V128) -> V128 {
290 unified::add::<_, u32, _>(self, a, b)
291 }
292
293 #[inline(always)]
295 fn u64x2_add(self, a: V128, b: V128) -> V128 {
296 unified::add::<_, u64, _>(self, a, b)
297 }
298
299 #[inline(always)]
301 fn u8x16_sub(self, a: V128, b: V128) -> V128 {
302 unified::sub::<_, u8, _>(self, a, b)
303 }
304
305 #[inline(always)]
307 fn u16x8_sub(self, a: V128, b: V128) -> V128 {
308 unified::sub::<_, u16, _>(self, a, b)
309 }
310
311 #[inline(always)]
313 fn u32x4_sub(self, a: V128, b: V128) -> V128 {
314 unified::sub::<_, u32, _>(self, a, b)
315 }
316
317 #[inline(always)]
319 fn u64x2_sub(self, a: V128, b: V128) -> V128 {
320 unified::sub::<_, u64, _>(self, a, b)
321 }
322
323 #[inline(always)]
325 fn u8x16_sub_sat(self, a: V128, b: V128) -> V128 {
326 unified::sub_sat::<_, u8, _>(self, a, b)
327 }
328
329 #[inline(always)]
331 fn u16x8_sub_sat(self, a: V128, b: V128) -> V128 {
332 unified::sub_sat::<_, u16, _>(self, a, b)
333 }
334
335 #[inline(always)]
337 fn i8x16_sub_sat(self, a: V128, b: V128) -> V128 {
338 unified::sub_sat::<_, i8, _>(self, a, b)
339 }
340
341 #[inline(always)]
343 fn i16x8_sub_sat(self, a: V128, b: V128) -> V128 {
344 unified::sub_sat::<_, i16, _>(self, a, b)
345 }
346
347 #[inline(always)]
349 fn i16x8_mul_lo(self, a: V128, b: V128) -> V128 {
350 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
351 if matches_isa!(Self, SSE2) {
352 return unsafe { t(_mm_mullo_epi16(t(a), t(b))) };
353 }
354 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
355 if matches_isa!(Self, NEON) {
356 return unsafe { t(vmulq_s16(t(a), t(b))) };
357 }
358 #[cfg(target_arch = "wasm32")]
359 if matches_isa!(Self, WASM128) {
360 return unsafe { t(i16x8_mul(t(a), t(b))) };
361 }
362 {
363 let _ = (a, b);
364 unreachable!()
365 }
366 }
367
368 #[inline(always)]
370 fn i32x4_mul_lo(self, a: V128, b: V128) -> V128 {
371 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
372 if matches_isa!(Self, SSE41) {
373 return unsafe { t(_mm_mullo_epi32(t(a), t(b))) };
374 }
375 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
376 if matches_isa!(Self, NEON) {
377 return unsafe { t(vmulq_s32(t(a), t(b))) };
378 }
379 #[cfg(target_arch = "wasm32")]
380 if matches_isa!(Self, WASM128) {
381 return unsafe { t(i32x4_mul(t(a), t(b))) };
382 }
383 {
384 let _ = (a, b);
385 unreachable!()
386 }
387 }
388
389 #[inline(always)]
391 fn u16x8_shl<const IMM8: i32>(self, a: V128) -> V128 {
392 if cfg!(miri) {
393 return crate::simulation::u16x8_shl(a, IMM8 as u8);
394 }
395 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
396 if matches_isa!(Self, SSE2) {
397 return unsafe { t(_mm_slli_epi16::<IMM8>(t(a))) };
398 }
399 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
400 if matches_isa!(Self, NEON) {
401 return unsafe { t(vshlq_n_u16::<IMM8>(t(a))) };
402 }
403 #[cfg(target_arch = "wasm32")]
404 if matches_isa!(Self, WASM128) {
405 return unsafe { t(u16x8_shl(t(a), IMM8 as u32)) };
406 }
407 {
408 let _ = a;
409 unreachable!()
410 }
411 }
412
413 #[inline(always)]
415 fn u32x4_shl<const IMM8: i32>(self, a: V128) -> V128 {
416 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
417 if matches_isa!(Self, SSE2) {
418 return unsafe { t(_mm_slli_epi32::<IMM8>(t(a))) };
419 }
420 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
421 if matches_isa!(Self, NEON) {
422 return unsafe { t(vshlq_n_u32::<IMM8>(t(a))) };
423 }
424 #[cfg(target_arch = "wasm32")]
425 if matches_isa!(Self, WASM128) {
426 return unsafe { t(u32x4_shl(t(a), IMM8 as u32)) };
427 }
428 {
429 let _ = a;
430 unreachable!()
431 }
432 }
433
434 #[inline(always)]
436 fn u16x8_shr<const IMM8: i32>(self, a: V128) -> V128 {
437 if cfg!(miri) {
438 return crate::simulation::u16x8_shr(a, IMM8 as u8);
439 }
440 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
441 if matches_isa!(Self, SSE2) {
442 return unsafe { t(_mm_srli_epi16::<IMM8>(t(a))) };
443 }
444 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
445 if matches_isa!(Self, NEON) {
446 return unsafe { t(vshrq_n_u16::<IMM8>(t(a))) };
447 }
448 #[cfg(target_arch = "wasm32")]
449 if matches_isa!(Self, WASM128) {
450 return unsafe { t(u16x8_shr(t(a), IMM8 as u32)) };
451 }
452 {
453 let _ = a;
454 unreachable!()
455 }
456 }
457
458 #[inline(always)]
460 fn u32x4_shr<const IMM8: i32>(self, a: V128) -> V128 {
461 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
462 if matches_isa!(Self, SSE2) {
463 return unsafe { t(_mm_srli_epi32::<IMM8>(t(a))) };
464 }
465 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
466 if matches_isa!(Self, NEON) {
467 return unsafe { t(vshrq_n_u32::<IMM8>(t(a))) };
468 }
469 #[cfg(target_arch = "wasm32")]
470 if matches_isa!(Self, WASM128) {
471 return unsafe { t(u32x4_shr(t(a), IMM8 as u32)) };
472 }
473 {
474 let _ = a;
475 unreachable!()
476 }
477 }
478
479 #[inline(always)]
481 fn u8x16_eq(self, a: V128, b: V128) -> V128 {
482 unified::eq::<_, u8, _>(self, a, b)
483 }
484
485 #[inline(always)]
487 fn u16x8_eq(self, a: V128, b: V128) -> V128 {
488 unified::eq::<_, u16, _>(self, a, b)
489 }
490
491 #[inline(always)]
493 fn u32x4_eq(self, a: V128, b: V128) -> V128 {
494 unified::eq::<_, u32, _>(self, a, b)
495 }
496
497 #[inline(always)]
501 fn u8x16_lt(self, a: V128, b: V128) -> V128 {
502 unified::lt::<_, u8, _>(self, a, b)
503 }
504
505 #[inline(always)]
509 fn u16x8_lt(self, a: V128, b: V128) -> V128 {
510 unified::lt::<_, u16, _>(self, a, b)
511 }
512
513 #[inline(always)]
517 fn u32x4_lt(self, a: V128, b: V128) -> V128 {
518 unified::lt::<_, u32, _>(self, a, b)
519 }
520
521 #[inline(always)]
523 fn i8x16_lt(self, a: V128, b: V128) -> V128 {
524 unified::lt::<_, i8, _>(self, a, b)
525 }
526
527 #[inline(always)]
529 fn i16x8_lt(self, a: V128, b: V128) -> V128 {
530 unified::lt::<_, i16, _>(self, a, b)
531 }
532
533 #[inline(always)]
535 fn i32x4_lt(self, a: V128, b: V128) -> V128 {
536 unified::lt::<_, i32, _>(self, a, b)
537 }
538
539 #[inline(always)]
541 fn u8x16_max(self, a: V128, b: V128) -> V128 {
542 unified::max::<_, u8, _>(self, a, b)
543 }
544
545 #[inline(always)]
547 fn u16x8_max(self, a: V128, b: V128) -> V128 {
548 unified::max::<_, u16, _>(self, a, b)
549 }
550
551 #[inline(always)]
553 fn u32x4_max(self, a: V128, b: V128) -> V128 {
554 unified::max::<_, u32, _>(self, a, b)
555 }
556
557 #[inline(always)]
559 fn i8x16_max(self, a: V128, b: V128) -> V128 {
560 unified::max::<_, i8, _>(self, a, b)
561 }
562
563 #[inline(always)]
565 fn i16x8_max(self, a: V128, b: V128) -> V128 {
566 unified::max::<_, i16, _>(self, a, b)
567 }
568
569 #[inline(always)]
571 fn i32x4_max(self, a: V128, b: V128) -> V128 {
572 unified::max::<_, i32, _>(self, a, b)
573 }
574
575 #[inline(always)]
577 fn u8x16_min(self, a: V128, b: V128) -> V128 {
578 unified::min::<_, u8, _>(self, a, b)
579 }
580
581 #[inline(always)]
583 fn u16x8_min(self, a: V128, b: V128) -> V128 {
584 unified::min::<_, u16, _>(self, a, b)
585 }
586
587 #[inline(always)]
589 fn u32x4_min(self, a: V128, b: V128) -> V128 {
590 unified::min::<_, u32, _>(self, a, b)
591 }
592
593 #[inline(always)]
595 fn i8x16_min(self, a: V128, b: V128) -> V128 {
596 unified::min::<_, i8, _>(self, a, b)
597 }
598
599 #[inline(always)]
601 fn i16x8_min(self, a: V128, b: V128) -> V128 {
602 unified::min::<_, i16, _>(self, a, b)
603 }
604
605 #[inline(always)]
607 fn i32x4_min(self, a: V128, b: V128) -> V128 {
608 unified::min::<_, i32, _>(self, a, b)
609 }
610
611 #[inline(always)]
615 fn u8x16_swizzle(self, a: V128, b: V128) -> V128 {
616 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
617 if matches_isa!(Self, SSSE3) {
618 return unsafe { t(_mm_shuffle_epi8(t(a), t(b))) };
619 }
620 #[cfg(all(feature = "unstable", target_arch = "arm"))]
621 if matches_isa!(Self, NEON) {
622 return unsafe {
623 let (a, b) = (t(a), t(b));
624 let a = uint8x8x2_t(vget_low_u8(a), vget_high_u8(a));
625 let b = (vget_low_u8(b), vget_high_u8(b));
626 let c = (vtbl2_u8(a, b.0), vtbl2_u8(a, b.1));
627 t([c.0, c.1])
628 };
629 }
630 #[cfg(target_arch = "aarch64")]
631 if matches_isa!(Self, NEON) {
632 return unsafe { t(vqtbl1q_u8(t(a), t(b))) };
633 }
634 #[cfg(target_arch = "wasm32")]
635 if matches_isa!(Self, WASM128) {
636 return unsafe { t(u8x16_swizzle(t(a), t(b))) };
637 }
638 {
639 let _ = (a, b);
640 unreachable!()
641 }
642 }
643
644 #[inline(always)]
646 fn u16x8_bswap(self, a: V128) -> V128 {
647 if matches_isa!(Self, SSE41 | WASM128) {
648 return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U16X8);
649 }
650
651 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
652 if matches_isa!(Self, NEON) {
653 return unsafe { t(vrev16q_u8(t(a))) };
654 }
655
656 {
657 let _ = a;
658 unreachable!()
659 }
660 }
661
662 #[inline(always)]
664 fn u32x4_bswap(self, a: V128) -> V128 {
665 if matches_isa!(Self, SSE41 | WASM128) {
666 return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U32X4);
667 }
668
669 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
670 if matches_isa!(Self, NEON) {
671 return unsafe { t(vrev32q_u8(t(a))) };
672 }
673
674 {
675 let _ = a;
676 unreachable!()
677 }
678 }
679
680 #[inline(always)]
682 fn u64x2_bswap(self, a: V128) -> V128 {
683 if matches_isa!(Self, SSE41 | WASM128) {
684 return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U64X2);
685 }
686
687 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
688 if matches_isa!(Self, NEON) {
689 return unsafe { t(vrev64q_u8(t(a))) };
690 }
691
692 {
693 let _ = a;
694 unreachable!()
695 }
696 }
697
698 #[inline(always)]
702 fn u8x16_any_zero(self, a: V128) -> bool {
703 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
704 if matches_isa!(Self, SSE2) {
705 let is_zero = self.u8x16_eq(a, self.v128_create_zero());
706 return self.u8x16_bitmask(is_zero) != 0;
707 }
708 #[cfg(all(feature = "unstable", target_arch = "arm"))]
709 if matches_isa!(Self, NEON) {
710 return unsafe {
711 let a: uint8x8x2_t = t(a);
712 let a = vpmin_u8(a.0, a.1);
713 let m: u64 = t(vtst_u8(a, a));
714 m != u64::MAX
715 };
716 }
717 #[cfg(target_arch = "aarch64")]
718 if matches_isa!(Self, NEON) {
719 return unsafe { vminvq_u8(t(a)) == 0 };
720 }
721 #[cfg(target_arch = "wasm32")]
722 if matches_isa!(Self, WASM128) {
723 return unsafe { !u8x16_all_true(t(a)) };
724 }
725 {
726 let _ = a;
727 unreachable!()
728 }
729 }
730
731 #[inline(always)]
733 fn u8x16_bitmask(self, a: V128) -> u16 {
734 if cfg!(miri) {
735 return crate::simulation::u8x16_bitmask(a);
736 }
737 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
738 if matches_isa!(Self, SSE2) {
739 return unsafe { _mm_movemask_epi8(t(a)) as u16 };
740 }
741 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
742 if matches_isa!(Self, NEON) {
743 unimplemented!()
744 }
745 #[cfg(target_arch = "wasm32")]
746 if matches_isa!(Self, WASM128) {
747 return unsafe { u8x16_bitmask(t(a)) };
748 }
749 {
750 let _ = a;
751 unreachable!()
752 }
753 }
754
755 #[inline(always)]
757 fn u8x16_reduce_max(self, a: V128) -> u8 {
758 if matches_isa!(Self, SSE41 | WASM128) {
759 unimplemented!()
760 }
761 #[cfg(all(feature = "unstable", target_arch = "arm"))]
762 if matches_isa!(Self, NEON) {
763 unimplemented!()
764 }
765 #[cfg(target_arch = "aarch64")]
766 if matches_isa!(Self, NEON) {
767 return unsafe { vmaxvq_u8(t(a)) };
768 }
769 {
770 let _ = a;
771 unreachable!()
772 }
773 }
774
775 #[inline(always)]
777 fn u8x16_reduce_min(self, a: V128) -> u8 {
778 if matches_isa!(Self, SSE41 | WASM128) {
779 unimplemented!()
780 }
781 #[cfg(all(feature = "unstable", target_arch = "arm"))]
782 if matches_isa!(Self, NEON) {
783 unimplemented!()
784 }
785 #[cfg(target_arch = "aarch64")]
786 if matches_isa!(Self, NEON) {
787 return unsafe { vminvq_u8(t(a)) };
788 }
789 {
790 let _ = a;
791 unreachable!()
792 }
793 }
794
795 #[inline(always)]
799 fn v128_bsl(self, a: V128, b: V128, c: V128) -> V128 {
800 if matches_isa!(Self, SSE2 | WASM128) {
801 return self.v128_xor(self.v128_and(self.v128_xor(b, c), a), c);
802 }
803 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
804 if matches_isa!(Self, NEON) {
805 return unsafe { t(vbslq_u8(t(a), t(b), t(c))) };
806 }
807 {
808 let _ = (a, b, c);
809 unreachable!()
810 }
811 }
812
813 #[inline(always)]
815 fn u8x16_zip_lo(self, a: V128, b: V128) -> V128 {
816 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
817 if matches_isa!(Self, SSE2) {
818 return unsafe { t(_mm_unpacklo_epi8(t(a), t(b))) };
819 }
820 #[cfg(all(feature = "unstable", target_arch = "arm"))]
821 if matches_isa!(Self, NEON) {
822 return unsafe { t(vzipq_u8(t(a), t(b)).0) };
823 }
824 #[cfg(target_arch = "aarch64")]
825 if matches_isa!(Self, NEON) {
826 return unsafe { t(vzip1q_u8(t(a), t(b))) };
827 }
828 #[cfg(target_arch = "wasm32")]
829 if matches_isa!(Self, WASM128) {
830 let (a, b) = unsafe { (t(a), t(b)) };
831 let ans = u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(a, b);
832 return unsafe { t(ans) };
833 }
834 {
835 let _ = (a, b);
836 unreachable!()
837 }
838 }
839
840 #[inline(always)]
842 fn u8x16_zip_hi(self, a: V128, b: V128) -> V128 {
843 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
844 if matches_isa!(Self, SSE2) {
845 return unsafe { t(_mm_unpackhi_epi8(t(a), t(b))) };
846 }
847 #[cfg(all(feature = "unstable", target_arch = "arm"))]
848 if matches_isa!(Self, NEON) {
849 return unsafe { t(vzipq_u8(t(a), t(b)).1) };
850 }
851 #[cfg(target_arch = "aarch64")]
852 if matches_isa!(Self, NEON) {
853 return unsafe { t(vzip2q_u8(t(a), t(b))) };
854 }
855 #[cfg(target_arch = "wasm32")]
856 if matches_isa!(Self, WASM128) {
857 let (a, b) = unsafe { (t(a), t(b)) };
858 let ans = u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(a, b);
859 return unsafe { t(ans) };
860 }
861 {
862 let _ = (a, b);
863 unreachable!()
864 }
865 }
866
867 #[inline(always)]
869 fn u16x8_zip_lo(self, a: V128, b: V128) -> V128 {
870 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
871 if matches_isa!(Self, SSE2) {
872 return unsafe { t(_mm_unpacklo_epi16(t(a), t(b))) };
873 }
874 #[cfg(all(feature = "unstable", target_arch = "arm"))]
875 if matches_isa!(Self, NEON) {
876 return unsafe { t(vzipq_u16(t(a), t(b)).0) };
877 }
878 #[cfg(target_arch = "aarch64")]
879 if matches_isa!(Self, NEON) {
880 return unsafe { t(vzip1q_u16(t(a), t(b))) };
881 }
882 #[cfg(target_arch = "wasm32")]
883 if matches_isa!(Self, WASM128) {
884 let (a, b) = unsafe { (t(a), t(b)) };
885 let ans = u16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(a, b);
886 return unsafe { t(ans) };
887 }
888 {
889 let _ = (a, b);
890 unreachable!()
891 }
892 }
893
894 #[inline(always)]
896 fn u16x8_zip_hi(self, a: V128, b: V128) -> V128 {
897 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
898 if matches_isa!(Self, SSE2) {
899 return unsafe { t(_mm_unpackhi_epi16(t(a), t(b))) };
900 }
901 #[cfg(all(feature = "unstable", target_arch = "arm"))]
902 if matches_isa!(Self, NEON) {
903 return unsafe { t(vzipq_u16(t(a), t(b)).1) };
904 }
905 #[cfg(target_arch = "aarch64")]
906 if matches_isa!(Self, NEON) {
907 return unsafe { t(vzip2q_u16(t(a), t(b))) };
908 }
909 #[cfg(target_arch = "wasm32")]
910 if matches_isa!(Self, WASM128) {
911 let (a, b) = unsafe { (t(a), t(b)) };
912 let ans = u16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(a, b);
913 return unsafe { t(ans) };
914 }
915 {
916 let _ = (a, b);
917 unreachable!()
918 }
919 }
920
921 #[inline(always)]
923 fn u32x4_zip_lo(self, a: V128, b: V128) -> V128 {
924 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
925 if matches_isa!(Self, SSE2) {
926 return unsafe { t(_mm_unpacklo_epi32(t(a), t(b))) };
927 }
928 #[cfg(all(feature = "unstable", target_arch = "arm"))]
929 if matches_isa!(Self, NEON) {
930 return unsafe { t(vzipq_u32(t(a), t(b)).0) };
931 }
932 #[cfg(target_arch = "aarch64")]
933 if matches_isa!(Self, NEON) {
934 return unsafe { t(vzip1q_u32(t(a), t(b))) };
935 }
936 #[cfg(target_arch = "wasm32")]
937 if matches_isa!(Self, WASM128) {
938 let (a, b) = unsafe { (t(a), t(b)) };
939 let ans = u32x4_shuffle::<0, 4, 1, 5>(a, b);
940 return unsafe { t(ans) };
941 }
942 {
943 let _ = (a, b);
944 unreachable!()
945 }
946 }
947
948 #[inline(always)]
950 fn u32x4_zip_hi(self, a: V128, b: V128) -> V128 {
951 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
952 if matches_isa!(Self, SSE2) {
953 return unsafe { t(_mm_unpackhi_epi32(t(a), t(b))) };
954 }
955 #[cfg(all(feature = "unstable", target_arch = "arm"))]
956 if matches_isa!(Self, NEON) {
957 return unsafe { t(vzipq_u32(t(a), t(b)).1) };
958 }
959 #[cfg(target_arch = "aarch64")]
960 if matches_isa!(Self, NEON) {
961 return unsafe { t(vzip2q_u32(t(a), t(b))) };
962 }
963 #[cfg(target_arch = "wasm32")]
964 if matches_isa!(Self, WASM128) {
965 let (a, b) = unsafe { (t(a), t(b)) };
966 let ans = u32x4_shuffle::<2, 6, 3, 7>(a, b);
967 return unsafe { t(ans) };
968 }
969 {
970 let _ = (a, b);
971 unreachable!()
972 }
973 }
974
975 #[inline(always)]
977 fn u64x2_zip_lo(self, a: V128, b: V128) -> V128 {
978 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
979 if matches_isa!(Self, SSE2) {
980 return unsafe { t(_mm_unpacklo_epi64(t(a), t(b))) };
981 }
982 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
983 if matches_isa!(Self, NEON) {
984 return unsafe {
985 let (a, b): ([u64; 2], [u64; 2]) = (t(a), t(b));
986 t([a[0], b[0]])
987 };
988 }
989 #[cfg(target_arch = "wasm32")]
990 if matches_isa!(Self, WASM128) {
991 let (a, b) = unsafe { (t(a), t(b)) };
992 let ans = u64x2_shuffle::<0, 2>(a, b);
993 return unsafe { t(ans) };
994 }
995 {
996 let _ = (a, b);
997 unreachable!()
998 }
999 }
1000
1001 #[inline(always)]
1003 fn u64x2_zip_hi(self, a: V128, b: V128) -> V128 {
1004 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1005 if matches_isa!(Self, SSE2) {
1006 return unsafe { t(_mm_unpackhi_epi64(t(a), t(b))) };
1007 }
1008 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
1009 if matches_isa!(Self, NEON) {
1010 return unsafe {
1011 let (a, b): ([u64; 2], [u64; 2]) = (t(a), t(b));
1012 t([a[1], b[1]])
1013 };
1014 }
1015 #[cfg(target_arch = "wasm32")]
1016 if matches_isa!(Self, WASM128) {
1017 let (a, b) = unsafe { (t(a), t(b)) };
1018 let ans = u64x2_shuffle::<1, 3>(a, b);
1019 return unsafe { t(ans) };
1020 }
1021 {
1022 let _ = (a, b);
1023 unreachable!()
1024 }
1025 }
1026
1027 #[inline(always)]
1029 fn u8x16_unzip_even(self, a: V128, b: V128) -> V128 {
1030 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1031 if matches_isa!(Self, SSE2) {
1032 unimplemented!()
1033 }
1034 #[cfg(all(feature = "unstable", target_arch = "arm"))]
1035 if matches_isa!(Self, NEON) {
1036 return unsafe { t(vuzpq_u8(t(a), t(b)).0) };
1037 }
1038 #[cfg(target_arch = "aarch64")]
1039 if matches_isa!(Self, NEON) {
1040 return unsafe { t(vuzp1q_u8(t(a), t(b))) };
1041 }
1042 #[cfg(target_arch = "wasm32")]
1043 if matches_isa!(Self, WASM128) {
1044 let (a, b) = unsafe { (t(a), t(b)) };
1045 let ans = u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b);
1046 return unsafe { t(ans) };
1047 }
1048 {
1049 let _ = (a, b);
1050 unreachable!()
1051 }
1052 }
1053
1054 #[inline(always)]
1056 fn u8x16_unzip_odd(self, a: V128, b: V128) -> V128 {
1057 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1058 if matches_isa!(Self, SSE2) {
1059 unimplemented!()
1060 }
1061 #[cfg(all(feature = "unstable", target_arch = "arm"))]
1062 if matches_isa!(Self, NEON) {
1063 return unsafe { t(vuzpq_u8(t(a), t(b)).1) };
1064 }
1065 #[cfg(target_arch = "aarch64")]
1066 if matches_isa!(Self, NEON) {
1067 return unsafe { t(vuzp2q_u8(t(a), t(b))) };
1068 }
1069 #[cfg(target_arch = "wasm32")]
1070 if matches_isa!(Self, WASM128) {
1071 let (a, b) = unsafe { (t(a), t(b)) };
1072 let ans = u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(a, b);
1073 return unsafe { t(ans) };
1074 }
1075 {
1076 let _ = (a, b);
1077 unreachable!()
1078 }
1079 }
1080
1081 #[inline(always)]
1083 fn u16x8_mul_hi(self, a: V128, b: V128) -> V128 {
1084 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1085 if matches_isa!(Self, SSE2) {
1086 return unsafe { t(_mm_mulhi_epu16(t(a), t(b))) };
1087 }
1088 if matches_isa!(Self, NEON | WASM128) {
1089 unimplemented!()
1090 }
1091 {
1092 let _ = (a, b);
1093 unreachable!()
1094 }
1095 }
1096
1097 #[inline(always)]
1099 fn i16x8_mul_hi(self, a: V128, b: V128) -> V128 {
1100 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1101 if matches_isa!(Self, SSE2) {
1102 return unsafe { t(_mm_mulhi_epi16(t(a), t(b))) };
1103 }
1104 if matches_isa!(Self, NEON | WASM128) {
1105 unimplemented!()
1106 }
1107 {
1108 let _ = (a, b);
1109 unreachable!()
1110 }
1111 }
1112
1113 #[inline(always)]
1115 fn i16x8_maddubs(self, a: V128, b: V128) -> V128 {
1116 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1117 if matches_isa!(Self, SSSE3) {
1118 return unsafe { t(_mm_maddubs_epi16(t(a), t(b))) };
1119 }
1120 if matches_isa!(Self, NEON | WASM128) {
1121 unimplemented!()
1122 }
1123 {
1124 let _ = (a, b);
1125 unreachable!()
1126 }
1127 }
1128
1129 #[inline(always)]
1131 fn u16x8_blend<const IMM8: i32>(self, a: V128, b: V128) -> V128 {
1132 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1133 if matches_isa!(Self, SSE41) {
1134 return unsafe { t(_mm_blend_epi16::<IMM8>(t(a), t(b))) };
1135 }
1136 if matches_isa!(Self, NEON | WASM128) {
1137 unimplemented!()
1138 }
1139 {
1140 let _ = (a, b);
1141 unreachable!()
1142 }
1143 }
1144
1145 #[inline(always)]
1149 fn u8x16_blendv(self, a: V128, b: V128, c: V128) -> V128 {
1150 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1151 if matches_isa!(Self, SSE41) {
1152 return unsafe { t(_mm_blendv_epi8(t(a), t(b), t(c))) };
1153 }
1154 if matches_isa!(Self, NEON | WASM128) {
1155 unimplemented!()
1156 }
1157 {
1158 let _ = (a, b, c);
1159 unreachable!()
1160 }
1161 }
1162
1163 #[inline(always)]
1165 fn i16x8_madd(self, a: V128, b: V128) -> V128 {
1166 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1167 if matches_isa!(Self, SSE2) {
1168 return unsafe { t(_mm_madd_epi16(t(a), t(b))) };
1169 }
1170 if matches_isa!(Self, NEON | WASM128) {
1171 unimplemented!()
1172 }
1173 {
1174 let _ = (a, b);
1175 unreachable!()
1176 }
1177 }
1178
1179 #[inline(always)]
1181 fn u8x16_avgr(self, a: V128, b: V128) -> V128 {
1182 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1183 if matches_isa!(Self, SSE2) {
1184 return unsafe { t(_mm_avg_epu8(t(a), t(b))) };
1185 }
1186 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
1187 if matches_isa!(Self, NEON) {
1188 return unsafe { t(vrhaddq_u8(t(a), t(b))) };
1189 }
1190 #[cfg(target_arch = "wasm32")]
1191 if matches_isa!(Self, WASM128) {
1192 return unsafe { t(u8x16_avgr(t(a), t(b))) };
1193 }
1194 {
1195 let _ = (a, b);
1196 unreachable!()
1197 }
1198 }
1199
1200 #[inline(always)]
1202 fn i8x16_add_sat(self, a: V128, b: V128) -> V128 {
1203 unified::add_sat::<_, i8, _>(self, a, b)
1204 }
1205
1206 #[inline(always)]
1208 fn u8x16_add_sat(self, a: V128, b: V128) -> V128 {
1209 unified::add_sat::<_, u8, _>(self, a, b)
1210 }
1211
1212 #[inline(always)]
1214 fn i16x8_packus(self, a: V128, b: V128) -> V128 {
1215 if cfg!(miri) {
1216 return crate::simulation::i16x8_packus(a, b);
1217 }
1218 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1219 if matches_isa!(Self, SSE2) {
1220 return unsafe { t(_mm_packus_epi16(t(a), t(b))) };
1221 }
1222 {
1223 let _ = (a, b);
1224 unreachable!()
1225 }
1226 }
1227}