vsimd/
macros.rs

1#[macro_export]
2macro_rules! item_group {
3    ($($item:item)*) => {
4        $($item)*
5    }
6}
7
8macro_rules! debug_assert_ptr_align {
9    ($ptr:expr, $align:literal) => {{
10        let align: usize = $align;
11        let ptr = <*const _>::cast::<()>($ptr);
12        let addr = ptr as usize;
13        debug_assert!(addr % align == 0)
14    }};
15}
16
17#[macro_export]
18macro_rules! shared_docs {
19    () => {
20        r#"
21# Profile settings
22
23To ensure maximum performance, the following [profile settings](https://doc.rust-lang.org/cargo/reference/profiles.html#profile-settings) are recommended when compiling this crate:
24
25```toml
26opt-level = 3
27lto = "fat"
28codegen-units = 1
29```
30
31# CPU feature detection
32
33The feature flag `detect` is enabled by default.
34
35When the feature flag `detect` is enabled, the APIs will *test at runtime* whether *the CPU (and OS)* supports the required instruction set. The runtime detection will be skipped if the fastest implementation is already available at compile-time.
36
37When the feature flag `detect` is disabled, the APIs will *test at compile-time* whether *the compiler flags* supports the required instruction set.
38
39If the environment supports SIMD acceleration, the APIs will call SIMD functions under the hood. Otherwise, the APIs will call fallback functions.
40
41When the feature flag `unstable` is enabled, this crate requires the nightly toolchain to compile.
42
43# `no_std` support
44
45You can disable the default features to use this crate in a `no_std` environment.
46
47You can enable the feature flag `alloc` if the environment supports heap allocation.
48
49Currently the feature flag `detect` depends on the standard library. Dynamic CPU feature detection is not available in `no_std` environments.
50"#
51    };
52}
53
54#[macro_export]
55macro_rules! dispatch {
56    (
57        name        = {$name:ident},
58        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
59        fallback    = {$fallback_fn:path},
60        simd        = {$simd_fn:path},
61        targets     = {$($target:tt),+},
62        fastest     = {$($fastest:tt),*},
63    ) => {
64        $vis mod $name {
65            #![allow(
66                clippy::missing_safety_doc,
67                clippy::must_use_candidate,
68            )]
69
70            use super::*;
71
72            use $crate::SIMD256;
73
74            #[allow(dead_code)]
75            #[inline]
76            $vis unsafe fn simd<S: SIMD256>(s: S $(,$arg_name: $arg_type)*) -> $ret {
77                $simd_fn(s, $($arg_name),*)
78            }
79
80            $crate::dispatch!(
81                @iter_compile,
82                signature   = {$vis unsafe fn($($arg_name: $arg_type),*) -> $ret},
83                simd        = {$simd_fn},
84                targets     = {$($target),+},
85            );
86
87            #[allow(unreachable_code)]
88            #[cfg(not(all(feature = "detect", not(target_arch = "wasm32"))))] // auto_direct
89            #[inline]
90            $vis unsafe fn auto($($arg_name: $arg_type),*) -> $ret {
91                $crate::dispatch!(
92                    @iter_resolve_static,
93                    targets     = {$($target),+},
94                    args        = {$($arg_name),*},
95                );
96                $fallback_fn($($arg_name),*)
97            }
98
99            #[cfg(all(feature = "detect", not(target_arch = "wasm32")))] // auto_indirect
100            $crate::item_group! {
101                use core::sync::atomic::{AtomicPtr, Ordering::Relaxed};
102
103                static IFUNC: AtomicPtr<()> = AtomicPtr::new(init_ifunc as *mut ());
104
105                #[inline(always)]
106                fn resolve() -> unsafe fn($($arg_type),*) -> $ret {
107                    use $crate::isa::InstructionSet;
108                    $crate::dispatch!(@iter_resolve_dynamic, targets = {$($target),+},);
109                    $fallback_fn
110                }
111
112                #[inline]
113                unsafe fn init_ifunc($($arg_name: $arg_type),*) -> $ret {
114                    let f = resolve();
115                    IFUNC.store(f as *mut (), Relaxed);
116                    f($($arg_name),*)
117                }
118
119                #[allow(unreachable_code)]
120                #[inline]
121                $vis unsafe fn auto($($arg_name: $arg_type),*) -> $ret {
122                    $crate::dispatch!(
123                        @iter_resolve_static,
124                        targets     = {$($fastest),+},
125                        args        = {$($arg_name),*},
126                    );
127
128                    let f: unsafe fn($($arg_type),*) -> $ret = core::mem::transmute(IFUNC.load(Relaxed));
129                    f($($arg_name),*)
130                }
131            }
132        }
133    };
134
135    (
136        @iter_resolve_static,
137        targets     = {$x:tt, $($xs:tt),+},
138        args        = {$($arg_name: ident),*},
139    ) => {
140        $crate::dispatch!(@resolve_static, $x, $($arg_name),*);
141        $crate::dispatch!(@iter_resolve_static, targets = {$($xs),+}, args = {$($arg_name),*},);
142    };
143
144    (
145        @iter_resolve_static,
146        targets     = {$x:tt},
147        args        = {$($arg_name: ident),*},
148    ) => {
149        $crate::dispatch!(@resolve_static, $x, $($arg_name),*);
150    };
151
152    (@resolve_static, "avx2", $($arg_name: ident),*) => {
153        #[cfg(all(
154            any(target_arch = "x86", target_arch = "x86_64"),
155            target_feature = "avx2"
156        ))]
157        {
158            return unsafe { avx2($($arg_name),*) }
159        }
160    };
161
162    (@resolve_static, "sse4.1", $($arg_name: ident),*) => {
163        #[cfg(all(
164            any(target_arch = "x86", target_arch = "x86_64"),
165            target_feature = "sse4.1"
166        ))]
167        {
168            return unsafe { sse41($($arg_name),*) }
169        }
170    };
171
172    (@resolve_static, "ssse3", $($arg_name: ident),*) => {
173        #[cfg(all(
174            any(target_arch = "x86", target_arch = "x86_64"),
175            target_feature = "ssse3"
176        ))]
177        {
178            return unsafe { ssse3($($arg_name),*) }
179        }
180    };
181
182    (@resolve_static, "sse2", $($arg_name: ident),*) => {
183        #[cfg(all(
184            any(target_arch = "x86", target_arch = "x86_64"),
185            target_feature = "sse2"
186        ))]
187        {
188            return unsafe { sse2($($arg_name),*) }
189        }
190    };
191
192    (@resolve_static, "neon", $($arg_name: ident),*) => {
193        #[cfg(any(
194            all(feature = "unstable", target_arch = "arm", target_feature = "neon"),
195            all(target_arch = "aarch64", target_feature = "neon"),
196        ))]
197        {
198            return unsafe { neon($($arg_name),*) }
199        }
200    };
201
202    (@resolve_static, "simd128", $($arg_name: ident),*) => {
203        #[cfg(all(
204            target_arch = "wasm32",
205            target_feature = "simd128",
206        ))]
207        {
208            return unsafe { simd128($($arg_name),*) }
209        }
210    };
211
212    (
213        @iter_resolve_dynamic,
214        targets     = {$x:tt, $($xs:tt),+},
215    ) => {
216        $crate::dispatch!(@resolve_dynamic, $x);
217        $crate::dispatch!(@iter_resolve_dynamic, targets = {$($xs),+},);
218    };
219
220    (
221        @iter_resolve_dynamic,
222        targets     = {$x:tt},
223    ) => {
224        $crate::dispatch!(@resolve_dynamic, $x);
225    };
226
227    (@resolve_dynamic, "avx2") => {
228        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
229        if $crate::isa::AVX2::is_enabled() {
230            return avx2;
231        }
232    };
233
234    (@resolve_dynamic, "sse4.1") => {
235        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
236        if $crate::isa::SSE41::is_enabled() {
237            return sse41;
238        }
239    };
240
241    (@resolve_dynamic, "ssse3") => {
242        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
243        if $crate::isa::SSSE3::is_enabled() {
244            return ssse3;
245        }
246    };
247
248    (@resolve_dynamic, "sse2") => {
249        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
250        if $crate::isa::SSE2::is_enabled() {
251            return sse2;
252        }
253    };
254
255    (@resolve_dynamic, "neon") => {
256        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
257        if $crate::isa::NEON::is_enabled() {
258            return neon;
259        }
260    };
261
262    (@resolve_dynamic, "simd128") => {
263        #[cfg(target_arch = "wasm32")]
264        if $crate::isa::WASM128::is_enabled() {
265            return simd128;
266        }
267    };
268
269    (
270        @iter_compile,
271        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
272        simd        = {$simd_fn:path},
273        targets     = {$x:tt, $($xs:tt),+},
274    ) => {
275        $crate::dispatch!(
276            @compile,
277            signature   = {$vis unsafe fn($($arg_name: $arg_type),*) -> $ret},
278            simd        = {$simd_fn},
279            target      = {$x},
280        );
281
282        $crate::dispatch!(
283            @iter_compile,
284            signature   = {$vis unsafe fn($($arg_name: $arg_type),*) -> $ret},
285            simd        = {$simd_fn},
286            targets     = {$($xs),+},
287        );
288    };
289
290    (
291        @iter_compile,
292        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
293        simd        = {$simd_fn:path},
294        targets     = {$x:tt},
295    ) => {
296        $crate::dispatch!(
297            @compile,
298            signature   = {$vis unsafe fn($($arg_name: $arg_type),*) -> $ret},
299            simd        = {$simd_fn},
300            target      = {$x},
301        );
302    };
303
304    (
305        @compile,
306        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
307        simd        = {$simd_fn:path},
308        target      = {"avx2"},
309    ) => {
310        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
311        #[inline]
312        #[target_feature(enable = "avx2")]
313        $vis unsafe fn avx2($($arg_name:$arg_type),*) -> $ret {
314            use $crate::isa::{AVX2, InstructionSet as _};
315            $simd_fn(AVX2::new() $(,$arg_name)*)
316        }
317    };
318
319    (
320        @compile,
321        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
322        simd        = {$simd_fn:path},
323        target      = {"sse4.1"},
324    ) => {
325        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
326        #[inline]
327        #[target_feature(enable = "sse4.1")]
328        $vis unsafe fn sse41($($arg_name:$arg_type),*) -> $ret {
329            use $crate::isa::{SSE41, InstructionSet as _};
330            $simd_fn(SSE41::new() $(,$arg_name)*)
331        }
332    };
333
334    (
335        @compile,
336        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
337        simd        = {$simd_fn:path},
338        target      = {"ssse3"},
339    ) => {
340        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
341        #[inline]
342        #[target_feature(enable = "ssse3")]
343        $vis unsafe fn ssse3($($arg_name:$arg_type),*) -> $ret {
344            use $crate::isa::{SSSE3, InstructionSet as _};
345            $simd_fn(SSSE3::new() $(,$arg_name)*)
346        }
347    };
348
349    (
350        @compile,
351        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
352        simd        = {$simd_fn:path},
353        target      = {"sse2"},
354    ) => {
355        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
356        #[inline]
357        #[target_feature(enable = "sse2")]
358        $vis unsafe fn sse2($($arg_name:$arg_type),*) -> $ret {
359            use $crate::isa::{SSE2, InstructionSet as _};
360            $simd_fn(SSE2::new() $(,$arg_name)*)
361        }
362    };
363
364    (
365        @compile,
366        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
367        simd        = {$simd_fn:path},
368        target      = {"neon"},
369    ) => {
370        #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
371        #[inline]
372        #[target_feature(enable = "neon")]
373        $vis unsafe fn neon($($arg_name:$arg_type),*) -> $ret {
374            use $crate::isa::{NEON, InstructionSet as _};
375            $simd_fn(NEON::new() $(,$arg_name)*)
376        }
377    };
378
379    (
380        @compile,
381        signature   = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
382        simd        = {$simd_fn:path},
383        target      = {"simd128"},
384    ) => {
385        #[cfg(target_arch = "wasm32")]
386        #[cfg_attr(not(vsimd_dump_symbols), inline)]
387        #[target_feature(enable = "simd128")]
388        $vis unsafe fn simd128($($arg_name:$arg_type),*) -> $ret {
389            use $crate::isa::{WASM128, InstructionSet as _};
390            $simd_fn(WASM128::new() $(,$arg_name)*)
391        }
392    }
393}