vsimd/
simd128.rs

1use crate::isa::{NEON, SSE2, SSE41, WASM128};
2use crate::unified;
3use crate::vector::V128;
4use crate::SIMD64;
5
6#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
7use crate::isa::SSSE3;
8
9#[cfg(any(
10    any(target_arch = "x86", target_arch = "x86_64"),
11    any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"),
12    target_arch = "wasm32"
13))]
14use core::mem::transmute as t;
15
16#[cfg(target_arch = "x86")]
17use core::arch::x86::*;
18
19#[cfg(target_arch = "x86_64")]
20use core::arch::x86_64::*;
21
22#[cfg(all(feature = "unstable", target_arch = "arm"))]
23use core::arch::arm::*;
24
25#[cfg(target_arch = "aarch64")]
26use core::arch::aarch64::*;
27
28#[cfg(target_arch = "wasm32")]
29use core::arch::wasm32::*;
30
31pub unsafe trait SIMD128: SIMD64 {
32    /// T1: SSE2, NEON, WASM128
33    #[inline(always)]
34    unsafe fn v128_load(self, addr: *const u8) -> V128 {
35        debug_assert_ptr_align!(addr, 16);
36
37        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
38        if matches_isa!(Self, SSE2) {
39            return t(_mm_load_si128(addr.cast()));
40        }
41        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
42        if matches_isa!(Self, NEON) {
43            return self.v128_load_unaligned(addr);
44        }
45        #[cfg(target_arch = "wasm32")]
46        if matches_isa!(Self, WASM128) {
47            return self.v128_load_unaligned(addr);
48        }
49        {
50            let _ = addr;
51            unreachable!()
52        }
53    }
54
55    /// T1: SSE2, NEON, WASM128
56    #[inline(always)]
57    unsafe fn v128_load_unaligned(self, addr: *const u8) -> V128 {
58        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
59        if matches_isa!(Self, SSE2) {
60            return t(_mm_loadu_si128(addr.cast()));
61        }
62        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
63        if matches_isa!(Self, NEON) {
64            return t(vld1q_u8(addr.cast()));
65        }
66        #[cfg(target_arch = "wasm32")]
67        if matches_isa!(Self, WASM128) {
68            return t(v128_load(addr.cast()));
69        }
70        {
71            let _ = addr;
72            unreachable!()
73        }
74    }
75
76    /// T1: SSE2, NEON, WASM128
77    #[inline(always)]
78    unsafe fn v128_store(self, addr: *mut u8, a: V128) {
79        debug_assert_ptr_align!(addr, 16);
80
81        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
82        if matches_isa!(Self, SSE2) {
83            return _mm_store_si128(addr.cast(), t(a));
84        }
85        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
86        if matches_isa!(Self, NEON) {
87            return self.v128_store_unaligned(addr, a);
88        }
89        #[cfg(target_arch = "wasm32")]
90        if matches_isa!(Self, WASM128) {
91            return self.v128_store_unaligned(addr, a);
92        }
93        {
94            let _ = (addr, a);
95            unreachable!()
96        }
97    }
98
99    /// T1: SSE2, NEON, WASM128
100    #[inline(always)]
101    unsafe fn v128_store_unaligned(self, addr: *mut u8, a: V128) {
102        if cfg!(miri) {
103            return addr.cast::<V128>().write_unaligned(a);
104        }
105        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
106        if matches_isa!(Self, SSE2) {
107            return _mm_storeu_si128(addr.cast(), t(a));
108        }
109        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
110        if matches_isa!(Self, NEON) {
111            return vst1q_u8(addr.cast(), t(a));
112        }
113        #[cfg(target_arch = "wasm32")]
114        if matches_isa!(Self, WASM128) {
115            return v128_store(addr.cast(), t(a));
116        }
117        {
118            let _ = (addr, a);
119            unreachable!()
120        }
121    }
122
123    /// T1: SSE2, NEON, WASM128
124    #[inline(always)]
125    fn v128_create_zero(self) -> V128 {
126        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
127        if matches_isa!(Self, SSE2) {
128            return unsafe { t(_mm_setzero_si128()) };
129        }
130        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
131        if matches_isa!(Self, NEON) {
132            return unsafe { t(vdupq_n_u8(0)) };
133        }
134        #[cfg(target_arch = "wasm32")]
135        if matches_isa!(Self, WASM128) {
136            return unsafe { t(u8x16_splat(0)) };
137        }
138        {
139            unreachable!()
140        }
141    }
142
143    /// T1: NEON, WASM128
144    ///
145    /// T2: SSE2
146    #[inline(always)]
147    fn v128_not(self, a: V128) -> V128 {
148        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
149        if matches_isa!(Self, SSE2) {
150            return unsafe {
151                let a = t(a);
152                t(_mm_xor_si128(a, _mm_cmpeq_epi8(a, a)))
153            };
154        }
155        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
156        if matches_isa!(Self, NEON) {
157            return unsafe { t(vmvnq_u8(t(a))) };
158        }
159        #[cfg(target_arch = "wasm32")]
160        if matches_isa!(Self, WASM128) {
161            return unsafe { t(v128_not(t(a))) };
162        }
163        {
164            let _ = a;
165            unreachable!()
166        }
167    }
168
169    /// T1: SSE2, NEON, WASM128
170    #[inline(always)]
171    fn v128_and(self, a: V128, b: V128) -> V128 {
172        unified::and(self, a, b)
173    }
174
175    /// T1: SSE2, NEON, WASM128
176    #[inline(always)]
177    fn v128_or(self, a: V128, b: V128) -> V128 {
178        unified::or(self, a, b)
179    }
180
181    /// T1: SSE2, NEON, WASM128
182    #[inline(always)]
183    fn v128_xor(self, a: V128, b: V128) -> V128 {
184        unified::xor(self, a, b)
185    }
186
187    /// T1: SSE2, NEON, WASM128
188    #[inline(always)]
189    fn v128_andnot(self, a: V128, b: V128) -> V128 {
190        unified::andnot(self, a, b)
191    }
192
193    /// T1: SSE41, NEON-A64, WASM128
194    ///
195    /// T2: NEON-A32
196    #[inline(always)]
197    fn v128_all_zero(self, a: V128) -> bool {
198        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
199        if matches_isa!(Self, SSE41) {
200            return unsafe {
201                let a = t(a);
202                _mm_testz_si128(a, a) != 0
203            };
204        }
205        #[cfg(all(feature = "unstable", target_arch = "arm"))]
206        if matches_isa!(Self, NEON) {
207            return unsafe {
208                let a1: uint32x2x2_t = t(a);
209                let a2: uint32x2_t = vorr_u32(a1.0, a1.1);
210                vget_lane_u32::<0>(vpmax_u32(a2, a2)) == 0
211            };
212        }
213        #[cfg(target_arch = "aarch64")]
214        if matches_isa!(Self, NEON) {
215            return unsafe { vmaxvq_u8(t(a)) == 0 };
216        }
217        #[cfg(target_arch = "wasm32")]
218        if matches_isa!(Self, WASM128) {
219            return unsafe { !v128_any_true(t(a)) };
220        }
221        {
222            let _ = a;
223            unreachable!()
224        }
225    }
226
227    /// T1: SSE2, NEON, WASM128
228    #[inline(always)]
229    fn u8x16_splat(self, x: u8) -> V128 {
230        unified::splat(self, x)
231    }
232
233    /// T1: SSE2, NEON, WASM128
234    #[inline(always)]
235    fn u16x8_splat(self, x: u16) -> V128 {
236        unified::splat(self, x)
237    }
238
239    /// T1: SSE2, NEON, WASM128
240    #[inline(always)]
241    fn u32x4_splat(self, x: u32) -> V128 {
242        unified::splat(self, x)
243    }
244
245    /// T1: SSE2, NEON, WASM128
246    #[inline(always)]
247    fn u64x2_splat(self, x: u64) -> V128 {
248        unified::splat(self, x)
249    }
250
251    /// T1: SSE2, NEON, WASM128
252    #[inline(always)]
253    fn i8x16_splat(self, x: i8) -> V128 {
254        unified::splat(self, x)
255    }
256
257    /// T1: SSE2, NEON, WASM128
258    #[inline(always)]
259    fn i16x8_splat(self, x: i16) -> V128 {
260        unified::splat(self, x)
261    }
262
263    /// T1: SSE2, NEON, WASM128
264    #[inline(always)]
265    fn i32x4_splat(self, x: i32) -> V128 {
266        unified::splat(self, x)
267    }
268
269    /// T1: SSE2, NEON, WASM128
270    #[inline(always)]
271    fn i64x2_splat(self, x: i64) -> V128 {
272        unified::splat(self, x)
273    }
274
275    /// T1: SSE2, NEON, WASM128
276    #[inline(always)]
277    fn u8x16_add(self, a: V128, b: V128) -> V128 {
278        unified::add::<_, u8, _>(self, a, b)
279    }
280
281    /// T1: SSE2, NEON, WASM128
282    #[inline(always)]
283    fn u16x8_add(self, a: V128, b: V128) -> V128 {
284        unified::add::<_, u16, _>(self, a, b)
285    }
286
287    /// T1: SSE2, NEON, WASM128
288    #[inline(always)]
289    fn u32x4_add(self, a: V128, b: V128) -> V128 {
290        unified::add::<_, u32, _>(self, a, b)
291    }
292
293    /// T1: SSE2, NEON, WASM128
294    #[inline(always)]
295    fn u64x2_add(self, a: V128, b: V128) -> V128 {
296        unified::add::<_, u64, _>(self, a, b)
297    }
298
299    /// T1: SSE2, NEON, WASM128
300    #[inline(always)]
301    fn u8x16_sub(self, a: V128, b: V128) -> V128 {
302        unified::sub::<_, u8, _>(self, a, b)
303    }
304
305    /// T1: SSE2, NEON, WASM128
306    #[inline(always)]
307    fn u16x8_sub(self, a: V128, b: V128) -> V128 {
308        unified::sub::<_, u16, _>(self, a, b)
309    }
310
311    /// T1: SSE2, NEON, WASM128
312    #[inline(always)]
313    fn u32x4_sub(self, a: V128, b: V128) -> V128 {
314        unified::sub::<_, u32, _>(self, a, b)
315    }
316
317    /// T1: SSE2, NEON, WASM128
318    #[inline(always)]
319    fn u64x2_sub(self, a: V128, b: V128) -> V128 {
320        unified::sub::<_, u64, _>(self, a, b)
321    }
322
323    /// T1: SSE2, NEON, WASM128
324    #[inline(always)]
325    fn u8x16_sub_sat(self, a: V128, b: V128) -> V128 {
326        unified::sub_sat::<_, u8, _>(self, a, b)
327    }
328
329    /// T1: SSE2, NEON, WASM128
330    #[inline(always)]
331    fn u16x8_sub_sat(self, a: V128, b: V128) -> V128 {
332        unified::sub_sat::<_, u16, _>(self, a, b)
333    }
334
335    /// T1: SSE2, NEON, WASM128
336    #[inline(always)]
337    fn i8x16_sub_sat(self, a: V128, b: V128) -> V128 {
338        unified::sub_sat::<_, i8, _>(self, a, b)
339    }
340
341    /// T1: SSE2, NEON, WASM128
342    #[inline(always)]
343    fn i16x8_sub_sat(self, a: V128, b: V128) -> V128 {
344        unified::sub_sat::<_, i16, _>(self, a, b)
345    }
346
347    /// T1: SSE2, NEON, WASM128
348    #[inline(always)]
349    fn i16x8_mul_lo(self, a: V128, b: V128) -> V128 {
350        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
351        if matches_isa!(Self, SSE2) {
352            return unsafe { t(_mm_mullo_epi16(t(a), t(b))) };
353        }
354        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
355        if matches_isa!(Self, NEON) {
356            return unsafe { t(vmulq_s16(t(a), t(b))) };
357        }
358        #[cfg(target_arch = "wasm32")]
359        if matches_isa!(Self, WASM128) {
360            return unsafe { t(i16x8_mul(t(a), t(b))) };
361        }
362        {
363            let _ = (a, b);
364            unreachable!()
365        }
366    }
367
368    /// T1: SSE41, NEON, WASM128
369    #[inline(always)]
370    fn i32x4_mul_lo(self, a: V128, b: V128) -> V128 {
371        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
372        if matches_isa!(Self, SSE41) {
373            return unsafe { t(_mm_mullo_epi32(t(a), t(b))) };
374        }
375        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
376        if matches_isa!(Self, NEON) {
377            return unsafe { t(vmulq_s32(t(a), t(b))) };
378        }
379        #[cfg(target_arch = "wasm32")]
380        if matches_isa!(Self, WASM128) {
381            return unsafe { t(i32x4_mul(t(a), t(b))) };
382        }
383        {
384            let _ = (a, b);
385            unreachable!()
386        }
387    }
388
389    /// T1: SSE2, NEON, WASM128
390    #[inline(always)]
391    fn u16x8_shl<const IMM8: i32>(self, a: V128) -> V128 {
392        if cfg!(miri) {
393            return crate::simulation::u16x8_shl(a, IMM8 as u8);
394        }
395        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
396        if matches_isa!(Self, SSE2) {
397            return unsafe { t(_mm_slli_epi16::<IMM8>(t(a))) };
398        }
399        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
400        if matches_isa!(Self, NEON) {
401            return unsafe { t(vshlq_n_u16::<IMM8>(t(a))) };
402        }
403        #[cfg(target_arch = "wasm32")]
404        if matches_isa!(Self, WASM128) {
405            return unsafe { t(u16x8_shl(t(a), IMM8 as u32)) };
406        }
407        {
408            let _ = a;
409            unreachable!()
410        }
411    }
412
413    /// T1: SSE2, NEON, WASM128
414    #[inline(always)]
415    fn u32x4_shl<const IMM8: i32>(self, a: V128) -> V128 {
416        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
417        if matches_isa!(Self, SSE2) {
418            return unsafe { t(_mm_slli_epi32::<IMM8>(t(a))) };
419        }
420        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
421        if matches_isa!(Self, NEON) {
422            return unsafe { t(vshlq_n_u32::<IMM8>(t(a))) };
423        }
424        #[cfg(target_arch = "wasm32")]
425        if matches_isa!(Self, WASM128) {
426            return unsafe { t(u32x4_shl(t(a), IMM8 as u32)) };
427        }
428        {
429            let _ = a;
430            unreachable!()
431        }
432    }
433
434    /// T1: SSE2, NEON, WASM128
435    #[inline(always)]
436    fn u16x8_shr<const IMM8: i32>(self, a: V128) -> V128 {
437        if cfg!(miri) {
438            return crate::simulation::u16x8_shr(a, IMM8 as u8);
439        }
440        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
441        if matches_isa!(Self, SSE2) {
442            return unsafe { t(_mm_srli_epi16::<IMM8>(t(a))) };
443        }
444        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
445        if matches_isa!(Self, NEON) {
446            return unsafe { t(vshrq_n_u16::<IMM8>(t(a))) };
447        }
448        #[cfg(target_arch = "wasm32")]
449        if matches_isa!(Self, WASM128) {
450            return unsafe { t(u16x8_shr(t(a), IMM8 as u32)) };
451        }
452        {
453            let _ = a;
454            unreachable!()
455        }
456    }
457
458    /// T1: SSE2, NEON, WASM128
459    #[inline(always)]
460    fn u32x4_shr<const IMM8: i32>(self, a: V128) -> V128 {
461        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
462        if matches_isa!(Self, SSE2) {
463            return unsafe { t(_mm_srli_epi32::<IMM8>(t(a))) };
464        }
465        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
466        if matches_isa!(Self, NEON) {
467            return unsafe { t(vshrq_n_u32::<IMM8>(t(a))) };
468        }
469        #[cfg(target_arch = "wasm32")]
470        if matches_isa!(Self, WASM128) {
471            return unsafe { t(u32x4_shr(t(a), IMM8 as u32)) };
472        }
473        {
474            let _ = a;
475            unreachable!()
476        }
477    }
478
479    /// T1: SSE2, NEON, WASM128
480    #[inline(always)]
481    fn u8x16_eq(self, a: V128, b: V128) -> V128 {
482        unified::eq::<_, u8, _>(self, a, b)
483    }
484
485    /// T1: SSE2, NEON, WASM128
486    #[inline(always)]
487    fn u16x8_eq(self, a: V128, b: V128) -> V128 {
488        unified::eq::<_, u16, _>(self, a, b)
489    }
490
491    /// T1: SSE2, NEON, WASM128
492    #[inline(always)]
493    fn u32x4_eq(self, a: V128, b: V128) -> V128 {
494        unified::eq::<_, u32, _>(self, a, b)
495    }
496
497    /// T1: NEON, WASM128
498    ///
499    /// T2: SSE2
500    #[inline(always)]
501    fn u8x16_lt(self, a: V128, b: V128) -> V128 {
502        unified::lt::<_, u8, _>(self, a, b)
503    }
504
505    /// T1: NEON, WASM128
506    ///
507    /// T2: SSE2
508    #[inline(always)]
509    fn u16x8_lt(self, a: V128, b: V128) -> V128 {
510        unified::lt::<_, u16, _>(self, a, b)
511    }
512
513    /// T1: NEON, WASM128
514    ///
515    /// T2: SSE2
516    #[inline(always)]
517    fn u32x4_lt(self, a: V128, b: V128) -> V128 {
518        unified::lt::<_, u32, _>(self, a, b)
519    }
520
521    /// T1: SSE2, NEON, WASM128
522    #[inline(always)]
523    fn i8x16_lt(self, a: V128, b: V128) -> V128 {
524        unified::lt::<_, i8, _>(self, a, b)
525    }
526
527    /// T1: SSE2, NEON, WASM128
528    #[inline(always)]
529    fn i16x8_lt(self, a: V128, b: V128) -> V128 {
530        unified::lt::<_, i16, _>(self, a, b)
531    }
532
533    /// T1: SSE2, NEON, WASM128
534    #[inline(always)]
535    fn i32x4_lt(self, a: V128, b: V128) -> V128 {
536        unified::lt::<_, i32, _>(self, a, b)
537    }
538
539    /// T1: SSE2, NEON, WASM128
540    #[inline(always)]
541    fn u8x16_max(self, a: V128, b: V128) -> V128 {
542        unified::max::<_, u8, _>(self, a, b)
543    }
544
545    /// T1: SSE41, NEON, WASM128
546    #[inline(always)]
547    fn u16x8_max(self, a: V128, b: V128) -> V128 {
548        unified::max::<_, u16, _>(self, a, b)
549    }
550
551    /// T1: SSE41, NEON, WASM128
552    #[inline(always)]
553    fn u32x4_max(self, a: V128, b: V128) -> V128 {
554        unified::max::<_, u32, _>(self, a, b)
555    }
556
557    /// T1: SSE41, NEON, WASM128
558    #[inline(always)]
559    fn i8x16_max(self, a: V128, b: V128) -> V128 {
560        unified::max::<_, i8, _>(self, a, b)
561    }
562
563    /// T1: SSE2, NEON, WASM128
564    #[inline(always)]
565    fn i16x8_max(self, a: V128, b: V128) -> V128 {
566        unified::max::<_, i16, _>(self, a, b)
567    }
568
569    /// T1: SSE41, NEON, WASM128
570    #[inline(always)]
571    fn i32x4_max(self, a: V128, b: V128) -> V128 {
572        unified::max::<_, i32, _>(self, a, b)
573    }
574
575    /// T1: SSE2, NEON, WASM128
576    #[inline(always)]
577    fn u8x16_min(self, a: V128, b: V128) -> V128 {
578        unified::min::<_, u8, _>(self, a, b)
579    }
580
581    /// T1: SSE41, NEON, WASM128
582    #[inline(always)]
583    fn u16x8_min(self, a: V128, b: V128) -> V128 {
584        unified::min::<_, u16, _>(self, a, b)
585    }
586
587    /// T1: SSE41, NEON, WASM128
588    #[inline(always)]
589    fn u32x4_min(self, a: V128, b: V128) -> V128 {
590        unified::min::<_, u32, _>(self, a, b)
591    }
592
593    /// T1: SSE41, NEON, WASM128
594    #[inline(always)]
595    fn i8x16_min(self, a: V128, b: V128) -> V128 {
596        unified::min::<_, i8, _>(self, a, b)
597    }
598
599    /// T1: SSE2, NEON, WASM128
600    #[inline(always)]
601    fn i16x8_min(self, a: V128, b: V128) -> V128 {
602        unified::min::<_, i16, _>(self, a, b)
603    }
604
605    /// T1: SSE41, NEON, WASM128
606    #[inline(always)]
607    fn i32x4_min(self, a: V128, b: V128) -> V128 {
608        unified::min::<_, i32, _>(self, a, b)
609    }
610
611    /// T1: SSSE3, NEON-A64, WASM128
612    ///
613    /// T2: NEON-A32
614    #[inline(always)]
615    fn u8x16_swizzle(self, a: V128, b: V128) -> V128 {
616        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
617        if matches_isa!(Self, SSSE3) {
618            return unsafe { t(_mm_shuffle_epi8(t(a), t(b))) };
619        }
620        #[cfg(all(feature = "unstable", target_arch = "arm"))]
621        if matches_isa!(Self, NEON) {
622            return unsafe {
623                let (a, b) = (t(a), t(b));
624                let a = uint8x8x2_t(vget_low_u8(a), vget_high_u8(a));
625                let b = (vget_low_u8(b), vget_high_u8(b));
626                let c = (vtbl2_u8(a, b.0), vtbl2_u8(a, b.1));
627                t([c.0, c.1])
628            };
629        }
630        #[cfg(target_arch = "aarch64")]
631        if matches_isa!(Self, NEON) {
632            return unsafe { t(vqtbl1q_u8(t(a), t(b))) };
633        }
634        #[cfg(target_arch = "wasm32")]
635        if matches_isa!(Self, WASM128) {
636            return unsafe { t(u8x16_swizzle(t(a), t(b))) };
637        }
638        {
639            let _ = (a, b);
640            unreachable!()
641        }
642    }
643
644    /// T1: SSE41, NEON, WASM128
645    #[inline(always)]
646    fn u16x8_bswap(self, a: V128) -> V128 {
647        if matches_isa!(Self, SSE41 | WASM128) {
648            return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U16X8);
649        }
650
651        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
652        if matches_isa!(Self, NEON) {
653            return unsafe { t(vrev16q_u8(t(a))) };
654        }
655
656        {
657            let _ = a;
658            unreachable!()
659        }
660    }
661
662    /// T1: SSE41, NEON, WASM128
663    #[inline(always)]
664    fn u32x4_bswap(self, a: V128) -> V128 {
665        if matches_isa!(Self, SSE41 | WASM128) {
666            return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U32X4);
667        }
668
669        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
670        if matches_isa!(Self, NEON) {
671            return unsafe { t(vrev32q_u8(t(a))) };
672        }
673
674        {
675            let _ = a;
676            unreachable!()
677        }
678    }
679
680    /// T1: SSE41, NEON, WASM128
681    #[inline(always)]
682    fn u64x2_bswap(self, a: V128) -> V128 {
683        if matches_isa!(Self, SSE41 | WASM128) {
684            return self.u8x16_swizzle(a, crate::bswap::SHUFFLE_U64X2);
685        }
686
687        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
688        if matches_isa!(Self, NEON) {
689            return unsafe { t(vrev64q_u8(t(a))) };
690        }
691
692        {
693            let _ = a;
694            unreachable!()
695        }
696    }
697
698    /// T1: NEON-A64, WASM128
699    ///
700    /// T2: SSE2, NEON-A32
701    #[inline(always)]
702    fn u8x16_any_zero(self, a: V128) -> bool {
703        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
704        if matches_isa!(Self, SSE2) {
705            let is_zero = self.u8x16_eq(a, self.v128_create_zero());
706            return self.u8x16_bitmask(is_zero) != 0;
707        }
708        #[cfg(all(feature = "unstable", target_arch = "arm"))]
709        if matches_isa!(Self, NEON) {
710            return unsafe {
711                let a: uint8x8x2_t = t(a);
712                let a = vpmin_u8(a.0, a.1);
713                let m: u64 = t(vtst_u8(a, a));
714                m != u64::MAX
715            };
716        }
717        #[cfg(target_arch = "aarch64")]
718        if matches_isa!(Self, NEON) {
719            return unsafe { vminvq_u8(t(a)) == 0 };
720        }
721        #[cfg(target_arch = "wasm32")]
722        if matches_isa!(Self, WASM128) {
723            return unsafe { !u8x16_all_true(t(a)) };
724        }
725        {
726            let _ = a;
727            unreachable!()
728        }
729    }
730
731    /// T1: SSE2, WASM128
732    #[inline(always)]
733    fn u8x16_bitmask(self, a: V128) -> u16 {
734        if cfg!(miri) {
735            return crate::simulation::u8x16_bitmask(a);
736        }
737        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
738        if matches_isa!(Self, SSE2) {
739            return unsafe { _mm_movemask_epi8(t(a)) as u16 };
740        }
741        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
742        if matches_isa!(Self, NEON) {
743            unimplemented!()
744        }
745        #[cfg(target_arch = "wasm32")]
746        if matches_isa!(Self, WASM128) {
747            return unsafe { u8x16_bitmask(t(a)) };
748        }
749        {
750            let _ = a;
751            unreachable!()
752        }
753    }
754
755    /// T1: NEON-A64
756    #[inline(always)]
757    fn u8x16_reduce_max(self, a: V128) -> u8 {
758        if matches_isa!(Self, SSE41 | WASM128) {
759            unimplemented!()
760        }
761        #[cfg(all(feature = "unstable", target_arch = "arm"))]
762        if matches_isa!(Self, NEON) {
763            unimplemented!()
764        }
765        #[cfg(target_arch = "aarch64")]
766        if matches_isa!(Self, NEON) {
767            return unsafe { vmaxvq_u8(t(a)) };
768        }
769        {
770            let _ = a;
771            unreachable!()
772        }
773    }
774
775    /// T1: NEON-A64
776    #[inline(always)]
777    fn u8x16_reduce_min(self, a: V128) -> u8 {
778        if matches_isa!(Self, SSE41 | WASM128) {
779            unimplemented!()
780        }
781        #[cfg(all(feature = "unstable", target_arch = "arm"))]
782        if matches_isa!(Self, NEON) {
783            unimplemented!()
784        }
785        #[cfg(target_arch = "aarch64")]
786        if matches_isa!(Self, NEON) {
787            return unsafe { vminvq_u8(t(a)) };
788        }
789        {
790            let _ = a;
791            unreachable!()
792        }
793    }
794
795    /// T1: NEON
796    ///
797    /// T2: SSE2, WASM128
798    #[inline(always)]
799    fn v128_bsl(self, a: V128, b: V128, c: V128) -> V128 {
800        if matches_isa!(Self, SSE2 | WASM128) {
801            return self.v128_xor(self.v128_and(self.v128_xor(b, c), a), c);
802        }
803        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
804        if matches_isa!(Self, NEON) {
805            return unsafe { t(vbslq_u8(t(a), t(b), t(c))) };
806        }
807        {
808            let _ = (a, b, c);
809            unreachable!()
810        }
811    }
812
813    /// T1: SSE2, NEON, WASM128
814    #[inline(always)]
815    fn u8x16_zip_lo(self, a: V128, b: V128) -> V128 {
816        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
817        if matches_isa!(Self, SSE2) {
818            return unsafe { t(_mm_unpacklo_epi8(t(a), t(b))) };
819        }
820        #[cfg(all(feature = "unstable", target_arch = "arm"))]
821        if matches_isa!(Self, NEON) {
822            return unsafe { t(vzipq_u8(t(a), t(b)).0) };
823        }
824        #[cfg(target_arch = "aarch64")]
825        if matches_isa!(Self, NEON) {
826            return unsafe { t(vzip1q_u8(t(a), t(b))) };
827        }
828        #[cfg(target_arch = "wasm32")]
829        if matches_isa!(Self, WASM128) {
830            let (a, b) = unsafe { (t(a), t(b)) };
831            let ans = u8x16_shuffle::<0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23>(a, b);
832            return unsafe { t(ans) };
833        }
834        {
835            let _ = (a, b);
836            unreachable!()
837        }
838    }
839
840    /// T1: SSE2, NEON, WASM128
841    #[inline(always)]
842    fn u8x16_zip_hi(self, a: V128, b: V128) -> V128 {
843        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
844        if matches_isa!(Self, SSE2) {
845            return unsafe { t(_mm_unpackhi_epi8(t(a), t(b))) };
846        }
847        #[cfg(all(feature = "unstable", target_arch = "arm"))]
848        if matches_isa!(Self, NEON) {
849            return unsafe { t(vzipq_u8(t(a), t(b)).1) };
850        }
851        #[cfg(target_arch = "aarch64")]
852        if matches_isa!(Self, NEON) {
853            return unsafe { t(vzip2q_u8(t(a), t(b))) };
854        }
855        #[cfg(target_arch = "wasm32")]
856        if matches_isa!(Self, WASM128) {
857            let (a, b) = unsafe { (t(a), t(b)) };
858            let ans = u8x16_shuffle::<8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31>(a, b);
859            return unsafe { t(ans) };
860        }
861        {
862            let _ = (a, b);
863            unreachable!()
864        }
865    }
866
867    /// T1: SSE2, NEON, WASM128
868    #[inline(always)]
869    fn u16x8_zip_lo(self, a: V128, b: V128) -> V128 {
870        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
871        if matches_isa!(Self, SSE2) {
872            return unsafe { t(_mm_unpacklo_epi16(t(a), t(b))) };
873        }
874        #[cfg(all(feature = "unstable", target_arch = "arm"))]
875        if matches_isa!(Self, NEON) {
876            return unsafe { t(vzipq_u16(t(a), t(b)).0) };
877        }
878        #[cfg(target_arch = "aarch64")]
879        if matches_isa!(Self, NEON) {
880            return unsafe { t(vzip1q_u16(t(a), t(b))) };
881        }
882        #[cfg(target_arch = "wasm32")]
883        if matches_isa!(Self, WASM128) {
884            let (a, b) = unsafe { (t(a), t(b)) };
885            let ans = u16x8_shuffle::<0, 8, 1, 9, 2, 10, 3, 11>(a, b);
886            return unsafe { t(ans) };
887        }
888        {
889            let _ = (a, b);
890            unreachable!()
891        }
892    }
893
894    /// T1: SSE2, NEON, WASM128
895    #[inline(always)]
896    fn u16x8_zip_hi(self, a: V128, b: V128) -> V128 {
897        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
898        if matches_isa!(Self, SSE2) {
899            return unsafe { t(_mm_unpackhi_epi16(t(a), t(b))) };
900        }
901        #[cfg(all(feature = "unstable", target_arch = "arm"))]
902        if matches_isa!(Self, NEON) {
903            return unsafe { t(vzipq_u16(t(a), t(b)).1) };
904        }
905        #[cfg(target_arch = "aarch64")]
906        if matches_isa!(Self, NEON) {
907            return unsafe { t(vzip2q_u16(t(a), t(b))) };
908        }
909        #[cfg(target_arch = "wasm32")]
910        if matches_isa!(Self, WASM128) {
911            let (a, b) = unsafe { (t(a), t(b)) };
912            let ans = u16x8_shuffle::<4, 12, 5, 13, 6, 14, 7, 15>(a, b);
913            return unsafe { t(ans) };
914        }
915        {
916            let _ = (a, b);
917            unreachable!()
918        }
919    }
920
921    /// T1: SSE2, NEON, WASM128
922    #[inline(always)]
923    fn u32x4_zip_lo(self, a: V128, b: V128) -> V128 {
924        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
925        if matches_isa!(Self, SSE2) {
926            return unsafe { t(_mm_unpacklo_epi32(t(a), t(b))) };
927        }
928        #[cfg(all(feature = "unstable", target_arch = "arm"))]
929        if matches_isa!(Self, NEON) {
930            return unsafe { t(vzipq_u32(t(a), t(b)).0) };
931        }
932        #[cfg(target_arch = "aarch64")]
933        if matches_isa!(Self, NEON) {
934            return unsafe { t(vzip1q_u32(t(a), t(b))) };
935        }
936        #[cfg(target_arch = "wasm32")]
937        if matches_isa!(Self, WASM128) {
938            let (a, b) = unsafe { (t(a), t(b)) };
939            let ans = u32x4_shuffle::<0, 4, 1, 5>(a, b);
940            return unsafe { t(ans) };
941        }
942        {
943            let _ = (a, b);
944            unreachable!()
945        }
946    }
947
948    /// T1: SSE2, NEON, WASM128
949    #[inline(always)]
950    fn u32x4_zip_hi(self, a: V128, b: V128) -> V128 {
951        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
952        if matches_isa!(Self, SSE2) {
953            return unsafe { t(_mm_unpackhi_epi32(t(a), t(b))) };
954        }
955        #[cfg(all(feature = "unstable", target_arch = "arm"))]
956        if matches_isa!(Self, NEON) {
957            return unsafe { t(vzipq_u32(t(a), t(b)).1) };
958        }
959        #[cfg(target_arch = "aarch64")]
960        if matches_isa!(Self, NEON) {
961            return unsafe { t(vzip2q_u32(t(a), t(b))) };
962        }
963        #[cfg(target_arch = "wasm32")]
964        if matches_isa!(Self, WASM128) {
965            let (a, b) = unsafe { (t(a), t(b)) };
966            let ans = u32x4_shuffle::<2, 6, 3, 7>(a, b);
967            return unsafe { t(ans) };
968        }
969        {
970            let _ = (a, b);
971            unreachable!()
972        }
973    }
974
975    /// T1: SSE2, NEON, WASM128
976    #[inline(always)]
977    fn u64x2_zip_lo(self, a: V128, b: V128) -> V128 {
978        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
979        if matches_isa!(Self, SSE2) {
980            return unsafe { t(_mm_unpacklo_epi64(t(a), t(b))) };
981        }
982        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
983        if matches_isa!(Self, NEON) {
984            return unsafe {
985                let (a, b): ([u64; 2], [u64; 2]) = (t(a), t(b));
986                t([a[0], b[0]])
987            };
988        }
989        #[cfg(target_arch = "wasm32")]
990        if matches_isa!(Self, WASM128) {
991            let (a, b) = unsafe { (t(a), t(b)) };
992            let ans = u64x2_shuffle::<0, 2>(a, b);
993            return unsafe { t(ans) };
994        }
995        {
996            let _ = (a, b);
997            unreachable!()
998        }
999    }
1000
1001    /// T1: SSE2, NEON, WASM128
1002    #[inline(always)]
1003    fn u64x2_zip_hi(self, a: V128, b: V128) -> V128 {
1004        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1005        if matches_isa!(Self, SSE2) {
1006            return unsafe { t(_mm_unpackhi_epi64(t(a), t(b))) };
1007        }
1008        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
1009        if matches_isa!(Self, NEON) {
1010            return unsafe {
1011                let (a, b): ([u64; 2], [u64; 2]) = (t(a), t(b));
1012                t([a[1], b[1]])
1013            };
1014        }
1015        #[cfg(target_arch = "wasm32")]
1016        if matches_isa!(Self, WASM128) {
1017            let (a, b) = unsafe { (t(a), t(b)) };
1018            let ans = u64x2_shuffle::<1, 3>(a, b);
1019            return unsafe { t(ans) };
1020        }
1021        {
1022            let _ = (a, b);
1023            unreachable!()
1024        }
1025    }
1026
1027    /// T1: NEON, WASM128
1028    #[inline(always)]
1029    fn u8x16_unzip_even(self, a: V128, b: V128) -> V128 {
1030        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1031        if matches_isa!(Self, SSE2) {
1032            unimplemented!()
1033        }
1034        #[cfg(all(feature = "unstable", target_arch = "arm"))]
1035        if matches_isa!(Self, NEON) {
1036            return unsafe { t(vuzpq_u8(t(a), t(b)).0) };
1037        }
1038        #[cfg(target_arch = "aarch64")]
1039        if matches_isa!(Self, NEON) {
1040            return unsafe { t(vuzp1q_u8(t(a), t(b))) };
1041        }
1042        #[cfg(target_arch = "wasm32")]
1043        if matches_isa!(Self, WASM128) {
1044            let (a, b) = unsafe { (t(a), t(b)) };
1045            let ans = u8x16_shuffle::<0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30>(a, b);
1046            return unsafe { t(ans) };
1047        }
1048        {
1049            let _ = (a, b);
1050            unreachable!()
1051        }
1052    }
1053
1054    /// T1: NEON, WASM128
1055    #[inline(always)]
1056    fn u8x16_unzip_odd(self, a: V128, b: V128) -> V128 {
1057        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1058        if matches_isa!(Self, SSE2) {
1059            unimplemented!()
1060        }
1061        #[cfg(all(feature = "unstable", target_arch = "arm"))]
1062        if matches_isa!(Self, NEON) {
1063            return unsafe { t(vuzpq_u8(t(a), t(b)).1) };
1064        }
1065        #[cfg(target_arch = "aarch64")]
1066        if matches_isa!(Self, NEON) {
1067            return unsafe { t(vuzp2q_u8(t(a), t(b))) };
1068        }
1069        #[cfg(target_arch = "wasm32")]
1070        if matches_isa!(Self, WASM128) {
1071            let (a, b) = unsafe { (t(a), t(b)) };
1072            let ans = u8x16_shuffle::<1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31>(a, b);
1073            return unsafe { t(ans) };
1074        }
1075        {
1076            let _ = (a, b);
1077            unreachable!()
1078        }
1079    }
1080
1081    /// T1: SSE2
1082    #[inline(always)]
1083    fn u16x8_mul_hi(self, a: V128, b: V128) -> V128 {
1084        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1085        if matches_isa!(Self, SSE2) {
1086            return unsafe { t(_mm_mulhi_epu16(t(a), t(b))) };
1087        }
1088        if matches_isa!(Self, NEON | WASM128) {
1089            unimplemented!()
1090        }
1091        {
1092            let _ = (a, b);
1093            unreachable!()
1094        }
1095    }
1096
1097    /// T1: SSE2
1098    #[inline(always)]
1099    fn i16x8_mul_hi(self, a: V128, b: V128) -> V128 {
1100        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1101        if matches_isa!(Self, SSE2) {
1102            return unsafe { t(_mm_mulhi_epi16(t(a), t(b))) };
1103        }
1104        if matches_isa!(Self, NEON | WASM128) {
1105            unimplemented!()
1106        }
1107        {
1108            let _ = (a, b);
1109            unreachable!()
1110        }
1111    }
1112
1113    /// T1: SSSE3
1114    #[inline(always)]
1115    fn i16x8_maddubs(self, a: V128, b: V128) -> V128 {
1116        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1117        if matches_isa!(Self, SSSE3) {
1118            return unsafe { t(_mm_maddubs_epi16(t(a), t(b))) };
1119        }
1120        if matches_isa!(Self, NEON | WASM128) {
1121            unimplemented!()
1122        }
1123        {
1124            let _ = (a, b);
1125            unreachable!()
1126        }
1127    }
1128
1129    /// T1: SSE41
1130    #[inline(always)]
1131    fn u16x8_blend<const IMM8: i32>(self, a: V128, b: V128) -> V128 {
1132        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1133        if matches_isa!(Self, SSE41) {
1134            return unsafe { t(_mm_blend_epi16::<IMM8>(t(a), t(b))) };
1135        }
1136        if matches_isa!(Self, NEON | WASM128) {
1137            unimplemented!()
1138        }
1139        {
1140            let _ = (a, b);
1141            unreachable!()
1142        }
1143    }
1144
1145    /// if highbit(c) { b } else { a }
1146    ///
1147    /// T1: SSE41
1148    #[inline(always)]
1149    fn u8x16_blendv(self, a: V128, b: V128, c: V128) -> V128 {
1150        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1151        if matches_isa!(Self, SSE41) {
1152            return unsafe { t(_mm_blendv_epi8(t(a), t(b), t(c))) };
1153        }
1154        if matches_isa!(Self, NEON | WASM128) {
1155            unimplemented!()
1156        }
1157        {
1158            let _ = (a, b, c);
1159            unreachable!()
1160        }
1161    }
1162
1163    /// T1: SSE2
1164    #[inline(always)]
1165    fn i16x8_madd(self, a: V128, b: V128) -> V128 {
1166        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1167        if matches_isa!(Self, SSE2) {
1168            return unsafe { t(_mm_madd_epi16(t(a), t(b))) };
1169        }
1170        if matches_isa!(Self, NEON | WASM128) {
1171            unimplemented!()
1172        }
1173        {
1174            let _ = (a, b);
1175            unreachable!()
1176        }
1177    }
1178
1179    /// T1: SSE2, NEON, WASM128
1180    #[inline(always)]
1181    fn u8x16_avgr(self, a: V128, b: V128) -> V128 {
1182        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1183        if matches_isa!(Self, SSE2) {
1184            return unsafe { t(_mm_avg_epu8(t(a), t(b))) };
1185        }
1186        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
1187        if matches_isa!(Self, NEON) {
1188            return unsafe { t(vrhaddq_u8(t(a), t(b))) };
1189        }
1190        #[cfg(target_arch = "wasm32")]
1191        if matches_isa!(Self, WASM128) {
1192            return unsafe { t(u8x16_avgr(t(a), t(b))) };
1193        }
1194        {
1195            let _ = (a, b);
1196            unreachable!()
1197        }
1198    }
1199
1200    /// T1: SSE2, NEON, WASM128
1201    #[inline(always)]
1202    fn i8x16_add_sat(self, a: V128, b: V128) -> V128 {
1203        unified::add_sat::<_, i8, _>(self, a, b)
1204    }
1205
1206    /// T1: SSE2, NEON, WASM128
1207    #[inline(always)]
1208    fn u8x16_add_sat(self, a: V128, b: V128) -> V128 {
1209        unified::add_sat::<_, u8, _>(self, a, b)
1210    }
1211
1212    /// T1: SSE2
1213    #[inline(always)]
1214    fn i16x8_packus(self, a: V128, b: V128) -> V128 {
1215        if cfg!(miri) {
1216            return crate::simulation::i16x8_packus(a, b);
1217        }
1218        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1219        if matches_isa!(Self, SSE2) {
1220            return unsafe { t(_mm_packus_epi16(t(a), t(b))) };
1221        }
1222        {
1223            let _ = (a, b);
1224            unreachable!()
1225        }
1226    }
1227}