1#[macro_export]
2macro_rules! item_group {
3 ($($item:item)*) => {
4 $($item)*
5 }
6}
7
8macro_rules! debug_assert_ptr_align {
9 ($ptr:expr, $align:literal) => {{
10 let align: usize = $align;
11 let ptr = <*const _>::cast::<()>($ptr);
12 let addr = ptr as usize;
13 debug_assert!(addr % align == 0)
14 }};
15}
16
17#[macro_export]
18macro_rules! shared_docs {
19 () => {
20 r#"
21# Profile settings
22
23To ensure maximum performance, the following [profile settings](https://doc.rust-lang.org/cargo/reference/profiles.html#profile-settings) are recommended when compiling this crate:
24
25```toml
26opt-level = 3
27lto = "fat"
28codegen-units = 1
29```
30
31# CPU feature detection
32
33The feature flag `detect` is enabled by default.
34
35When the feature flag `detect` is enabled, the APIs will *test at runtime* whether *the CPU (and OS)* supports the required instruction set. The runtime detection will be skipped if the fastest implementation is already available at compile-time.
36
37When the feature flag `detect` is disabled, the APIs will *test at compile-time* whether *the compiler flags* supports the required instruction set.
38
39If the environment supports SIMD acceleration, the APIs will call SIMD functions under the hood. Otherwise, the APIs will call fallback functions.
40
41When the feature flag `unstable` is enabled, this crate requires the nightly toolchain to compile.
42
43# `no_std` support
44
45You can disable the default features to use this crate in a `no_std` environment.
46
47You can enable the feature flag `alloc` if the environment supports heap allocation.
48
49Currently the feature flag `detect` depends on the standard library. Dynamic CPU feature detection is not available in `no_std` environments.
50"#
51 };
52}
53
54#[macro_export]
55macro_rules! dispatch {
56 (
57 name = {$name:ident},
58 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
59 fallback = {$fallback_fn:path},
60 simd = {$simd_fn:path},
61 targets = {$($target:tt),+},
62 fastest = {$($fastest:tt),*},
63 ) => {
64 $vis mod $name {
65 #![allow(
66 clippy::missing_safety_doc,
67 clippy::must_use_candidate,
68 )]
69
70 use super::*;
71
72 use $crate::SIMD256;
73
74 #[allow(dead_code)]
75 #[inline]
76 $vis unsafe fn simd<S: SIMD256>(s: S $(,$arg_name: $arg_type)*) -> $ret {
77 $simd_fn(s, $($arg_name),*)
78 }
79
80 $crate::dispatch!(
81 @iter_compile,
82 signature = {$vis unsafe fn($($arg_name: $arg_type),*) -> $ret},
83 simd = {$simd_fn},
84 targets = {$($target),+},
85 );
86
87 #[allow(unreachable_code)]
88 #[cfg(not(all(feature = "detect", not(target_arch = "wasm32"))))] #[inline]
90 $vis unsafe fn auto($($arg_name: $arg_type),*) -> $ret {
91 $crate::dispatch!(
92 @iter_resolve_static,
93 targets = {$($target),+},
94 args = {$($arg_name),*},
95 );
96 $fallback_fn($($arg_name),*)
97 }
98
99 #[cfg(all(feature = "detect", not(target_arch = "wasm32")))] $crate::item_group! {
101 use core::sync::atomic::{AtomicPtr, Ordering::Relaxed};
102
103 static IFUNC: AtomicPtr<()> = AtomicPtr::new(init_ifunc as *mut ());
104
105 #[inline(always)]
106 fn resolve() -> unsafe fn($($arg_type),*) -> $ret {
107 use $crate::isa::InstructionSet;
108 $crate::dispatch!(@iter_resolve_dynamic, targets = {$($target),+},);
109 $fallback_fn
110 }
111
112 #[inline]
113 unsafe fn init_ifunc($($arg_name: $arg_type),*) -> $ret {
114 let f = resolve();
115 IFUNC.store(f as *mut (), Relaxed);
116 f($($arg_name),*)
117 }
118
119 #[allow(unreachable_code)]
120 #[inline]
121 $vis unsafe fn auto($($arg_name: $arg_type),*) -> $ret {
122 $crate::dispatch!(
123 @iter_resolve_static,
124 targets = {$($fastest),+},
125 args = {$($arg_name),*},
126 );
127
128 let f: unsafe fn($($arg_type),*) -> $ret = core::mem::transmute(IFUNC.load(Relaxed));
129 f($($arg_name),*)
130 }
131 }
132 }
133 };
134
135 (
136 @iter_resolve_static,
137 targets = {$x:tt, $($xs:tt),+},
138 args = {$($arg_name: ident),*},
139 ) => {
140 $crate::dispatch!(@resolve_static, $x, $($arg_name),*);
141 $crate::dispatch!(@iter_resolve_static, targets = {$($xs),+}, args = {$($arg_name),*},);
142 };
143
144 (
145 @iter_resolve_static,
146 targets = {$x:tt},
147 args = {$($arg_name: ident),*},
148 ) => {
149 $crate::dispatch!(@resolve_static, $x, $($arg_name),*);
150 };
151
152 (@resolve_static, "avx2", $($arg_name: ident),*) => {
153 #[cfg(all(
154 any(target_arch = "x86", target_arch = "x86_64"),
155 target_feature = "avx2"
156 ))]
157 {
158 return unsafe { avx2($($arg_name),*) }
159 }
160 };
161
162 (@resolve_static, "sse4.1", $($arg_name: ident),*) => {
163 #[cfg(all(
164 any(target_arch = "x86", target_arch = "x86_64"),
165 target_feature = "sse4.1"
166 ))]
167 {
168 return unsafe { sse41($($arg_name),*) }
169 }
170 };
171
172 (@resolve_static, "ssse3", $($arg_name: ident),*) => {
173 #[cfg(all(
174 any(target_arch = "x86", target_arch = "x86_64"),
175 target_feature = "ssse3"
176 ))]
177 {
178 return unsafe { ssse3($($arg_name),*) }
179 }
180 };
181
182 (@resolve_static, "sse2", $($arg_name: ident),*) => {
183 #[cfg(all(
184 any(target_arch = "x86", target_arch = "x86_64"),
185 target_feature = "sse2"
186 ))]
187 {
188 return unsafe { sse2($($arg_name),*) }
189 }
190 };
191
192 (@resolve_static, "neon", $($arg_name: ident),*) => {
193 #[cfg(any(
194 all(feature = "unstable", target_arch = "arm", target_feature = "neon"),
195 all(target_arch = "aarch64", target_feature = "neon"),
196 ))]
197 {
198 return unsafe { neon($($arg_name),*) }
199 }
200 };
201
202 (@resolve_static, "simd128", $($arg_name: ident),*) => {
203 #[cfg(all(
204 target_arch = "wasm32",
205 target_feature = "simd128",
206 ))]
207 {
208 return unsafe { simd128($($arg_name),*) }
209 }
210 };
211
212 (
213 @iter_resolve_dynamic,
214 targets = {$x:tt, $($xs:tt),+},
215 ) => {
216 $crate::dispatch!(@resolve_dynamic, $x);
217 $crate::dispatch!(@iter_resolve_dynamic, targets = {$($xs),+},);
218 };
219
220 (
221 @iter_resolve_dynamic,
222 targets = {$x:tt},
223 ) => {
224 $crate::dispatch!(@resolve_dynamic, $x);
225 };
226
227 (@resolve_dynamic, "avx2") => {
228 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
229 if $crate::isa::AVX2::is_enabled() {
230 return avx2;
231 }
232 };
233
234 (@resolve_dynamic, "sse4.1") => {
235 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
236 if $crate::isa::SSE41::is_enabled() {
237 return sse41;
238 }
239 };
240
241 (@resolve_dynamic, "ssse3") => {
242 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
243 if $crate::isa::SSSE3::is_enabled() {
244 return ssse3;
245 }
246 };
247
248 (@resolve_dynamic, "sse2") => {
249 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
250 if $crate::isa::SSE2::is_enabled() {
251 return sse2;
252 }
253 };
254
255 (@resolve_dynamic, "neon") => {
256 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
257 if $crate::isa::NEON::is_enabled() {
258 return neon;
259 }
260 };
261
262 (@resolve_dynamic, "simd128") => {
263 #[cfg(target_arch = "wasm32")]
264 if $crate::isa::WASM128::is_enabled() {
265 return simd128;
266 }
267 };
268
269 (
270 @iter_compile,
271 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
272 simd = {$simd_fn:path},
273 targets = {$x:tt, $($xs:tt),+},
274 ) => {
275 $crate::dispatch!(
276 @compile,
277 signature = {$vis unsafe fn($($arg_name: $arg_type),*) -> $ret},
278 simd = {$simd_fn},
279 target = {$x},
280 );
281
282 $crate::dispatch!(
283 @iter_compile,
284 signature = {$vis unsafe fn($($arg_name: $arg_type),*) -> $ret},
285 simd = {$simd_fn},
286 targets = {$($xs),+},
287 );
288 };
289
290 (
291 @iter_compile,
292 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
293 simd = {$simd_fn:path},
294 targets = {$x:tt},
295 ) => {
296 $crate::dispatch!(
297 @compile,
298 signature = {$vis unsafe fn($($arg_name: $arg_type),*) -> $ret},
299 simd = {$simd_fn},
300 target = {$x},
301 );
302 };
303
304 (
305 @compile,
306 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
307 simd = {$simd_fn:path},
308 target = {"avx2"},
309 ) => {
310 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
311 #[inline]
312 #[target_feature(enable = "avx2")]
313 $vis unsafe fn avx2($($arg_name:$arg_type),*) -> $ret {
314 use $crate::isa::{AVX2, InstructionSet as _};
315 $simd_fn(AVX2::new() $(,$arg_name)*)
316 }
317 };
318
319 (
320 @compile,
321 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
322 simd = {$simd_fn:path},
323 target = {"sse4.1"},
324 ) => {
325 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
326 #[inline]
327 #[target_feature(enable = "sse4.1")]
328 $vis unsafe fn sse41($($arg_name:$arg_type),*) -> $ret {
329 use $crate::isa::{SSE41, InstructionSet as _};
330 $simd_fn(SSE41::new() $(,$arg_name)*)
331 }
332 };
333
334 (
335 @compile,
336 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
337 simd = {$simd_fn:path},
338 target = {"ssse3"},
339 ) => {
340 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
341 #[inline]
342 #[target_feature(enable = "ssse3")]
343 $vis unsafe fn ssse3($($arg_name:$arg_type),*) -> $ret {
344 use $crate::isa::{SSSE3, InstructionSet as _};
345 $simd_fn(SSSE3::new() $(,$arg_name)*)
346 }
347 };
348
349 (
350 @compile,
351 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
352 simd = {$simd_fn:path},
353 target = {"sse2"},
354 ) => {
355 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
356 #[inline]
357 #[target_feature(enable = "sse2")]
358 $vis unsafe fn sse2($($arg_name:$arg_type),*) -> $ret {
359 use $crate::isa::{SSE2, InstructionSet as _};
360 $simd_fn(SSE2::new() $(,$arg_name)*)
361 }
362 };
363
364 (
365 @compile,
366 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
367 simd = {$simd_fn:path},
368 target = {"neon"},
369 ) => {
370 #[cfg(any(all(feature = "unstable", target_arch = "arm"), target_arch = "aarch64"))]
371 #[inline]
372 #[target_feature(enable = "neon")]
373 $vis unsafe fn neon($($arg_name:$arg_type),*) -> $ret {
374 use $crate::isa::{NEON, InstructionSet as _};
375 $simd_fn(NEON::new() $(,$arg_name)*)
376 }
377 };
378
379 (
380 @compile,
381 signature = {$vis:vis unsafe fn($($arg_name: ident: $arg_type: ty),*) -> $ret:ty},
382 simd = {$simd_fn:path},
383 target = {"simd128"},
384 ) => {
385 #[cfg(target_arch = "wasm32")]
386 #[cfg_attr(not(vsimd_dump_symbols), inline)]
387 #[target_feature(enable = "simd128")]
388 $vis unsafe fn simd128($($arg_name:$arg_type),*) -> $ret {
389 use $crate::isa::{WASM128, InstructionSet as _};
390 $simd_fn(WASM128::new() $(,$arg_name)*)
391 }
392 }
393}