1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
#![allow(clippy::needless_doctest_main)]
//! This crate provides the [`target`] and [`multiversion`] attributes for implementing
//! function multiversioning.
//!
//! Many CPU architectures have a variety of instruction set extensions that provide additional
//! functionality. Common examples are single instruction, multiple data (SIMD) extensions such as
//! SSE and AVX on x86/x86-64 and NEON on ARM/AArch64. When available, these extended features can
//! provide significant speed improvements to some functions. These optional features cannot be
//! haphazardly compiled into programs–executing an unsupported instruction will result in a
//! crash.
//!
//! **Function multiversioning** is the practice of compiling multiple versions of a function
//! with various features enabled and safely detecting which version to use at runtime.
//!
//! # Cargo features
//! There is one cargo feature, `std`, enabled by default. When enabled, [`multiversion`] will
//! use CPU feature detection at runtime to dispatch the appropriate function. Disabling this
//! feature will only allow compile-time function dispatch using `#[cfg(target_feature)]` and can
//! be used in `#[no_std]` crates.
//!
//! # Capabilities
//! The intention of this crate is to allow any function, other than trait methods, to be
//! multiversioned. If any functions do not work please file an issue on GitHub.
//!
//! The [`multiversion`] macro produces additional functions adjacent to the tagged function which
//! do not correspond to a trait member. If you would like to multiversion a trait method, instead
//! try multiversioning a free function or struct method and calling it from the trait method.
//!
//! # Target specification strings
//! Targets for the [`target`] and [`multiversion`] attributes are specified as a combination of
//! architecture (as specified in the [`target_arch`] attribute) and feature (as specified in the
//! [`target_feature`] attribute). A single architecture can be specified as:
//! * `"arch"`
//! * `"arch+feature"`
//! * `"arch+feature1+feature2"`
//!
//! while multiple architectures can be specified as:
//! * `"[arch1|arch2]"`
//! * `"[arch1|arch2]+feature"`
//! * `"[arch1|arch2]+feature1+feature2"`
//!
//! The following are all valid target specification strings:
//! * `"x86"` (matches the `"x86"` architecture)
//! * `"x86_64+avx+avx2"` (matches the `"x86_64"` architecture with the `"avx"` and `"avx2"`
//! features)
//! * `"[mips|mips64|powerpc|powerpc64]"` (matches any of the `"mips"`, `"mips64"`, `"powerpc"` or
//! `"powerpc64"` architectures)
//! * `"[arm|aarch64]+neon"` (matches either the `"arm"` or `"aarch64"` architectures with the
//! `"neon"` feature)
//!
//! # Example
//! The following example is a good candidate for optimization with SIMD. The function `square`
//! optionally uses the AVX instruction set extension on x86 or x86-64. The SSE instruction set
//! extension is part of x86-64, but is optional on x86 so the square function optionally detects
//! that as well. This is automatically implemented by the [`multiversion`] attribute.
//!
//! The following works by compiling multiple *clones* of the function with various features enabled
//! and detecting which to use at runtime. If none of the targets match the current CPU (e.g. an older
//! x86-64 CPU, or another architecture such as ARM), a clone without any features enabled is used.
//! ```
//! use multiversion::multiversion;
//!
//! #[multiversion]
//! #[clone(target = "[x86|x86_64]+avx")]
//! #[clone(target = "x86+sse")]
//! fn square(x: &mut [f32]) {
//! for v in x {
//! *v *= *v;
//! }
//! }
//! ```
//!
//! The following produces a nearly identical function, but instead of cloning the function, the
//! implementations are manually specified. This is typically more useful when the implementations
//! aren't identical, such as when using explicit SIMD instructions instead of relying on compiler
//! optimizations.
//! ```
//! use multiversion::{multiversion, target};
//!
//! #[target("[x86|x86_64]+avx")]
//! unsafe fn square_avx(x: &mut [f32]) {
//! for v in x {
//! *v *= *v;
//! }
//! }
//!
//! #[target("x86+sse")]
//! unsafe fn square_sse(x: &mut [f32]) {
//! for v in x {
//! *v *= *v;
//! }
//! }
//!
//! #[multiversion]
//! #[specialize(target = "[x86|x86_64]+avx", fn = "square_avx", unsafe = true)]
//! #[specialize(target = "x86+sse", fn = "square_sse", unsafe = true)]
//! fn square(x: &mut [f32]) {
//! for v in x {
//! *v *= *v;
//! }
//! }
//!
//! # fn main() {}
//! ```
//!
//! # Static dispatching
//! Sometimes it may be useful to call multiversioned functions from other multiversioned functions.
//! In these situations it would be inefficient to perform feature detection multiple times.
//! Additionally, the runtime detection prevents the function from being inlined. In this situation,
//! the `dispatch` helper macro allows bypassing feature detection:
//!
//! ```
//! # mod fix { // doctests do something weird with modules, this fixes it
//! use multiversion::multiversion;
//!
//! #[multiversion]
//! #[clone(target = "[x86|x86_64]+avx")]
//! #[clone(target = "x86+sse")]
//! fn square(x: &mut [f32]) {
//! for v in x {
//! *v *= *v
//! }
//! }
//!
//! #[multiversion]
//! #[clone(target = "[x86|x86_64]+avx")]
//! #[clone(target = "x86+sse")]
//! fn square_plus_one(x: &mut [f32]) {
//! dispatch!(square(x)); // this function call bypasses feature detection
//! for v in x {
//! *v += 1.0;
//! }
//! }
//!
//! # }
//! ```
//!
//! The `dispatch` macro supports either paths or function calls:
//! * `dispatch!(foo)`
//! * `dispatch!(Self::foo::<A, B>)`
//! * `dispatch!(foo(a, b))`
//! * `dispatch!(self.foo::<A, B>(a, b))`
//!
//! The statically dispatched function must be multiversioned over a subset of CPU features
//! supported by the caller function. For example, a function compiled for `x86_64+avx+avx2`
//! cannot statically dispatch a function compiled for `x86_64+avx`, but a function compiled
//! for `x86_64+avx` may statically dispatch a multiversioned function compiled for both
//! `[x86|x86_64]+avx` and `x86+sse` since an exact feature match exists for that architecture.
//!
//! # Conditional compilation
//! The `#[cfg]` attribute allows conditional compilation based on the target architecture and
//! features, however this does not take into account additional features specified by
//! `#[target_feature]`. In this scenario, the `#[target_cfg]` helper attribute provides
//! conditional compilation in functions tagged with [`multiversion`] or [`target`].
//!
//! The `#[target_cfg]` attribute supports `all`, `any`, and `not` (just like `#[cfg]`) and
//! supports the following keys:
//! * `target`: takes a target specification string as a value and is true if the target matches
//! the function's target
//!
//! ```
//! #[multiversion::multiversion]
//! #[clone(target = "[x86|x86_64]+avx")]
//! #[clone(target = "[arm|aarch64]+neon")]
//! fn print_arch() {
//! #[target_cfg(target = "[x86|x86_64]+avx")]
//! println!("avx");
//!
//! #[target_cfg(target = "[arm|aarch64]+neon")]
//! println!("neon");
//!
//! #[target_cfg(not(any(target = "[x86|x86_64]+avx", target = "[arm|aarch64]+neon")))]
//! println!("generic");
//! }
//! ```
//!
//! [`target`]: attr.target.html
//! [`multiversion`]: attr.multiversion.html
//! [`target_arch`]: https://doc.rust-lang.org/reference/conditional-compilation.html#target_arch
//! [`target_feature`]: https://doc.rust-lang.org/reference/conditional-compilation.html#target_feature
/// Provides function multiversioning.
///
/// Functions are selected in order, calling the first matching target. The function tagged by the
/// attribute is the generic implementation that does not require any specific architecture or
/// features.
///
/// # Helper attributes
/// * `#[clone]`
/// * Clones the function for the specified target.
/// * Arguments:
/// * `target`: the target specification of the clone
/// * `#[specialize]`
/// * Specializes the function for the specified target with another function.
/// * Arguments:
/// * `target`: the target specification of the specialization
/// * `fn`: path to the function specializing the tagged function
/// * `unsafe` (optional): indicates whether the specialization function is `unsafe`, but safe to
/// call for this target.
/// Functions tagged with the [`target`] attribute must be `unsafe`, so marking `unsafe = true`
/// indicates that the safety contract is fulfilled and`function` is safe to call on the specified
/// target. If `function` is unsafe for any other reason, remember to mark the tagged function
/// `unsafe` as well.
/// * `#[crate_path]`
/// * Specifies the location of the multiversion crate (useful for re-exporting).
/// * Arguments:
/// * `path`: the path to the multiversion crate
///
/// # Examples
/// ## Cloning
/// The following compiles `square` three times, once for each target and once for the generic
/// target. Calling `square` selects the appropriate version at runtime.
/// ```
/// use multiversion::multiversion;
///
/// #[multiversion]
/// #[clone(target = "[x86|x86_64]+avx")]
/// #[clone(target = "x86+sse")]
/// fn square(x: &mut [f32]) {
/// for v in x {
/// *v *= *v
/// }
/// }
/// ```
///
/// ## Specialization
/// This example creates a function `where_am_i` that prints the detected CPU feature.
/// ```
/// use multiversion::multiversion;
///
/// fn where_am_i_avx() {
/// println!("avx");
/// }
///
/// fn where_am_i_sse() {
/// println!("sse");
/// }
///
/// fn where_am_i_neon() {
/// println!("neon");
/// }
///
/// #[multiversion]
/// #[specialize(target = "[x86|x86_64]+avx", fn = "where_am_i_avx")]
/// #[specialize(target = "x86+sse", fn = "where_am_i_sse")]
/// #[specialize(target = "[arm|aarch64]+neon", fn = "where_am_i_neon")]
/// fn where_am_i() {
/// println!("generic");
/// }
///
/// # fn main() {}
/// ```
/// ## Making `target_feature` functions safe
/// This example is the same as the above example, but calls `unsafe` specialized functions. Note
/// that the `where_am_i` function is still safe, since we know we are only calling specialized
/// functions on supported CPUs.
/// ```
/// use multiversion::{multiversion, target};
///
/// #[target("[x86|x86_64]+avx")]
/// unsafe fn where_am_i_avx() {
/// println!("avx");
/// }
///
/// #[target("x86+sse")]
/// unsafe fn where_am_i_sse() {
/// println!("sse");
/// }
///
/// #[target("[arm|aarch64]+neon")]
/// unsafe fn where_am_i_neon() {
/// println!("neon");
/// }
///
/// #[multiversion]
/// #[specialize(target = "[x86|x86_64]+avx", fn = "where_am_i_avx", unsafe = true)]
/// #[specialize(target = "x86+sse", fn = "where_am_i_sse", unsafe = true)]
/// #[specialize(target = "[arm|aarch64]+neon", fn = "where_am_i_neon")]
/// fn where_am_i() {
/// println!("generic");
/// }
///
/// # fn main() {}
/// ```
///
/// # Static dispatching
/// The [`multiversion`] attribute allows functions called inside the function to be statically dispatched.
/// Additionally, functions created with this attribute can themselves be statically dispatched.
/// See [static dispatching] for more information.
///
/// # Conditional compilation
/// The [`multiversion`] attribute supports conditional compilation with the `#[target_cfg]` helper
/// attribute. See [conditional compilation] for more information.
///
/// # Function name mangling
/// The functions created by this macro are mangled as `{ident}_{features}_version`, where `ident` is
/// the name of the multiversioned function, and `features` is either `default` (for the default
/// version with no features enabled) or the list of features, sorted alphabetically. Dots (`.`)
/// in the feature names are removed.
///
/// The following creates two functions, `foo_avx_sse41_version` and `foo_default_version`.
/// ```
/// #[multiversion::multiversion]
/// #[clone(target = "[x86|x86_64]+sse4.1+avx")]
/// fn foo() {}
///
/// #[multiversion::target("[x86|x86_64]+sse4.1+avx")]
/// unsafe fn call_foo_avx() {
/// foo_avx_sse41_version();
/// }
///
/// fn call_foo_default() {
/// foo_default_version();
/// }
/// ```
///
/// # Implementation details
/// The function version dispatcher consists of a function selector and an atomic function pointer.
/// Initially the function pointer will point to the function selector. On invocation, this selector
/// will then choose an implementation, store a pointer to it in the atomic function pointer for later
/// use and then pass on control to the chosen function. On subsequent calls, the chosen function
/// will be called without invoking the function selector.
///
/// Some comments on the benefits of this implementation:
/// * The function selector is only invoked once. Subsequent calls are reduced to an atomic load
/// and indirect function call (for non-generic, non-`async` functions). Generic and `async` functions
/// cannot be stored in the atomic function pointer, which may result in additional branches.
/// * If called in multiple threads, there is no contention. It is possible for two threads to hit
/// the same function before function selection has completed, which results in each thread
/// invoking the function selector, but the atomic ensures that these are synchronized correctly.
///
/// [`target`]: attr.target.html
/// [`multiversion`]: attr.multiversion.html
/// [static dispatching]: index.html#static-dispatching
/// [conditional compilation]: index.html#conditional-compilation
pub use multiversion_macros::multiversion;
/// Provides a less verbose equivalent to the `target_arch` and `target_feature` attributes.
///
/// A function tagged with `#[target("[x86|x86_64]+avx+avx2")]`, for example, is equivalent to a
/// function tagged with each of:
/// * `#[target_arch(any(target_arch = "x86", target_arch = "x86_64"))]`
/// * `#[target_feature(enable = "avx")]`
/// * `#[target_feature(enable = "avx2")]`
///
/// The [`target`] attribute is intended to be used in tandem with the [`multiversion`] attribute
/// to produce hand-written multiversioned functions.
///
/// # Helper attributes
/// * `#[safe_inner]`
/// * Indicates that the inner contents of the function are safe and requires the use of `unsafe`
/// blocks to call `unsafe` functions.
///
/// # Static dispatching
/// The [`target`] attribute allows functions called inside the function to be statically dispatched.
/// See [static dispatching] for more information.
///
/// # Conditional compilation
/// The [`target`] attribute supports conditional compilation with the `#[target_cfg]` helper
/// attribute. See [conditional compilation] for more information.
///
/// [`target`]: attr.target.html
/// [`multiversion`]: attr.multiversion.html
/// [static dispatching]: index.html#static-dispatching
/// [conditional compilation]: index.html#conditional-compilation
pub use multiversion_macros::target;
/// Detects CPU features.
///
/// When the `std` feature is enabled, this macro operates like the standard library detection
/// macro for the current target (e.g. [`is_x86_feature_detected`]), but accepts multiple arguments.
///
/// When the `std` feature is not enabled, this macro detects if the feature is
/// enabled during compilation, using the [`cfg`] attribute.
///
/// [`is_x86_feature_detected`]: https://doc.rust-lang.org/std/macro.is_x86_feature_detected.html
/// [`cfg`]: https://doc.rust-lang.org/reference/conditional-compilation.html#target_feature
#[cfg(any(feature = "std", doc))]
#[macro_export]
macro_rules! are_cpu_features_detected {
{ $feature:tt $(,)? } => {
{
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{ is_x86_feature_detected!($feature) }
#[cfg(target_arch = "arm")]
{ is_arm_feature_detected!($feature) }
#[cfg(target_arch = "aarch64")]
{ is_aarch64_feature_detected!($feature) }
#[cfg(target_arch = "powerpc")]
{ is_powerpc_feature_detected!($feature) }
#[cfg(target_arch = "powerpc64")]
{ is_powerpc64_feature_detected!($feature) }
#[cfg(target_arch = "mips")]
{ is_mips_feature_detected!($feature) }
#[cfg(target_arch = "mips64")]
{ is_mips64_feature_detected!($feature) }
#[cfg(not(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "arm",
target_arch = "aarch64",
target_arch = "powerpc",
target_arch = "powerpc64",
target_arch = "mips",
target_arch = "mips64",
)))]
{ compile_error!("Unsupported architecture. Expected x86, x86_64, arm, aarch64, powerpc, powerpc64, mips, or mips64.") }
}
};
{ $first:tt, $($features:tt),+ $(,)? } => {
$crate::are_cpu_features_detected!($first) $(&& $crate::are_cpu_features_detected!($features))*
}
}
#[cfg(not(any(feature = "std", doc)))]
#[macro_export]
macro_rules! are_cpu_features_detected {
{ $($features:tt),+ } => {
{
#[cfg(all( $(target_feature = $features),* ))]
{ true }
#[cfg(not(all( $(target_feature = $features),* )))]
{ false }
}
}
}