ring/cpu/intel.rs
1// Copyright 2016-2021 Brian Smith.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15use cfg_if::cfg_if;
16
17mod abi_assumptions {
18 use core::mem::size_of;
19
20 // TOOD: Support targets that do not have SSE and SSE2 enabled, such as
21 // x86_64-unknown-linux-none. See
22 // https://github.com/briansmith/ring/issues/1793#issuecomment-1793243725,
23 // https://github.com/briansmith/ring/issues/1832,
24 // https://github.com/briansmith/ring/issues/1833.
25 const _ASSUMES_SSE2: () =
26 assert!(cfg!(target_feature = "sse") && cfg!(target_feature = "sse2"));
27
28 #[cfg(target_arch = "x86_64")]
29 const _ASSUMED_POINTER_SIZE: usize = 8;
30 #[cfg(target_arch = "x86")]
31 const _ASSUMED_POINTER_SIZE: usize = 4;
32 const _ASSUMED_USIZE_SIZE: () = assert!(size_of::<usize>() == _ASSUMED_POINTER_SIZE);
33 const _ASSUMED_REF_SIZE: () = assert!(size_of::<&'static u8>() == _ASSUMED_POINTER_SIZE);
34
35 const _ASSUMED_ENDIANNESS: () = assert!(cfg!(target_endian = "little"));
36}
37
38pub(super) mod featureflags {
39 use super::super::CAPS_STATIC;
40 use crate::{
41 cpu,
42 polyfill::{once_cell::race, usize_from_u32},
43 };
44 use core::num::NonZeroUsize;
45
46 pub(in super::super) fn get_or_init() -> cpu::Features {
47 // SAFETY: `OPENSSL_cpuid_setup` must be called only in
48 // `INIT.call_once()` below.
49 prefixed_extern! {
50 fn OPENSSL_cpuid_setup(out: &mut [u32; 4]);
51 }
52
53 let _: NonZeroUsize = FEATURES.get_or_init(|| {
54 let mut cpuid = [0; 4];
55 // SAFETY: We assume that it is safe to execute CPUID and XGETBV.
56 unsafe {
57 OPENSSL_cpuid_setup(&mut cpuid);
58 }
59 let detected = super::cpuid_to_caps_and_set_c_flags(&cpuid);
60 let merged = CAPS_STATIC | detected;
61
62 let merged = usize_from_u32(merged) | (1 << (super::Shift::Initialized as u32));
63 NonZeroUsize::new(merged).unwrap() // Can't fail because we just set a bit.
64 });
65
66 // SAFETY: We initialized the CPU features as required.
67 // `INIT.call_once` has `happens-before` semantics.
68 unsafe { cpu::Features::new_after_feature_flags_written_and_synced_unchecked() }
69 }
70
71 pub(in super::super) fn get(_cpu_features: cpu::Features) -> u32 {
72 // SAFETY: Since only `get_or_init()` could have created
73 // `_cpu_features`, and it only does so after `FEATURES.get_or_init()`,
74 // we know we are reading from `FEATURES` after initializing it.
75 //
76 // Also, 0 means "no features detected" to users, which is designed to
77 // be a safe configuration.
78 let features = FEATURES.get().map(NonZeroUsize::get).unwrap_or(0);
79
80 // The truncation is lossless, as we set the value with a u32.
81 #[allow(clippy::cast_possible_truncation)]
82 let features = features as u32;
83
84 features
85 }
86
87 static FEATURES: race::OnceNonZeroUsize = race::OnceNonZeroUsize::new();
88
89 #[cfg(target_arch = "x86")]
90 #[rustfmt::skip]
91 pub const STATIC_DETECTED: u32 = 0
92 | (if cfg!(target_feature = "sse2") { super::Sse2::mask() } else { 0 })
93 ;
94
95 // Limited to x86_64-v2 features.
96 // TODO: Add missing x86-64-v3 features if we find real-world use of x86-64-v3.
97 // TODO: Add all features we use.
98 #[cfg(target_arch = "x86_64")]
99 #[rustfmt::skip]
100 pub const STATIC_DETECTED: u32 = 0
101 | if cfg!(target_feature = "sse4.1") { super::Sse41::mask() } else { 0 }
102 | if cfg!(target_feature = "ssse3") { super::Ssse3::mask() } else { 0 }
103 ;
104
105 pub const FORCE_DYNAMIC_DETECTION: u32 = 0;
106}
107
108fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
109 // "Intel" citations are for "Intel 64 and IA-32 Architectures Software
110 // Developer’s Manual", Combined Volumes, December 2024.
111 // "AMD" citations are for "AMD64 Technology AMD64 Architecture
112 // Programmer’s Manual, Volumes 1-5" Revision 4.08 April 2024.
113
114 // The `prefixed_extern!` uses below assume this
115 #[cfg(target_arch = "x86_64")]
116 use core::{mem::align_of, sync::atomic::AtomicU32};
117 #[cfg(target_arch = "x86_64")]
118 const _ATOMIC32_ALIGNMENT_EQUALS_U32_ALIGNMENT: () =
119 assert!(align_of::<AtomicU32>() == align_of::<u32>());
120
121 fn check(leaf: u32, bit: u32) -> bool {
122 let shifted = 1 << bit;
123 (leaf & shifted) == shifted
124 }
125 fn set(out: &mut u32, shift: Shift) {
126 let shifted = 1 << (shift as u32);
127 debug_assert_eq!(*out & shifted, 0);
128 *out |= shifted;
129 debug_assert_eq!(*out & shifted, shifted);
130 }
131
132 #[cfg(target_arch = "x86_64")]
133 let is_intel = check(cpuid[0], 30); // Synthesized by `OPENSSL_cpuid_setup`
134
135 // CPUID leaf 1.
136 let leaf1_ecx = cpuid[1];
137
138 // Intel: "Structured Extended Feature Flags Enumeration Leaf"
139 #[cfg(target_arch = "x86_64")]
140 let (extended_features_ebx, extended_features_ecx) = (cpuid[2], cpuid[3]);
141
142 let mut caps = 0;
143
144 // AMD: "Collectively the SSE1, [...] are referred to as the legacy SSE
145 // instructions. All legacy SSE instructions support 128-bit vector
146 // operands."
147
148 // Intel: "11.6.2 Checking for Intel SSE and SSE2 Support"
149 // We have to assume the prerequisites for SSE/SSE2 are met since we're
150 // already almost definitely using SSE registers if these target features
151 // are enabled.
152 //
153 // These also seem to help ensure CMOV support; There doesn't seem to be
154 // a `cfg!(target_feature = "cmov")`. It is likely that removing these
155 // assertions will remove the requirement for CMOV. With our without
156 // CMOV, it is likely that some of our timing side channel prevention does
157 // not work. Presumably the people who delete these are verifying that it
158 // all works fine.
159 const _SSE_REQUIRED: () = assert!(cfg!(target_feature = "sse"));
160 const _SSE2_REQUIRED: () = assert!(cfg!(target_feature = "sse2"));
161
162 #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
163 {
164 // If somebody is trying to compile for an x86 target without SSE2
165 // and they deleted the `_SSE2_REQUIRED` const assertion above then
166 // they're probably trying to support a Linux/BSD/etc. distro that
167 // tries to support ancient x86 systems without SSE/SSE2. Try to
168 // reduce the harm caused, by implementing dynamic feature detection
169 // for them so that most systems will work like normal.
170 //
171 // Note that usually an x86-64 target with SSE2 disabled by default,
172 // usually `-none-` targets, will not support dynamically-detected use
173 // of SIMD registers via CPUID. A whole different mechanism is needed
174 // to support them. Same for i*86-*-none targets.
175 let leaf1_edx = cpuid[0];
176 let sse1_available = check(leaf1_edx, 25);
177 let sse2_available = check(leaf1_edx, 26);
178 if sse1_available && sse2_available {
179 set(&mut caps, Shift::Sse2);
180 }
181 }
182
183 // Sometimes people delete the `_SSE_REQUIRED`/`_SSE2_REQUIRED` const
184 // assertions in an attempt to support pre-SSE2 32-bit x86 systems. If they
185 // do, hopefully they won't delete these redundant assertions, so that
186 // x86_64 isn't affected.
187 #[cfg(target_arch = "x86_64")]
188 const _SSE2_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
189 #[cfg(target_arch = "x86_64")]
190 const _SSE_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
191
192 // Intel: "12.7.2 Checking for SSSE3 Support"
193 // If/when we support dynamic detection of SSE/SSE2, make this conditional
194 // on SSE/SSE2.
195 if check(leaf1_ecx, 9) {
196 set(&mut caps, Shift::Ssse3);
197 }
198
199 // Intel: "12.12.2 Checking for Intel SSE4.1 Support"
200 // If/when we support dynamic detection of SSE/SSE2, make this conditional
201 // on SSE/SSE2.
202 // XXX: We don't check for SSE3 and we're not sure if it is compatible for
203 // us to do so; does AMD advertise SSE3? TODO: address this.
204 // XXX: We don't condition this on SSSE3 being available. TODO: address
205 // this.
206 #[cfg(target_arch = "x86_64")]
207 if check(leaf1_ecx, 19) {
208 set(&mut caps, Shift::Sse41);
209 }
210
211 // AMD: "The extended SSE instructions include [...]."
212
213 // Intel: "14.3 DETECTION OF INTEL AVX INSTRUCTIONS"
214 // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
215 // support AVX state.
216 let avx_available = check(leaf1_ecx, 28);
217 if avx_available {
218 set(&mut caps, Shift::Avx);
219 }
220
221 #[cfg(target_arch = "x86_64")]
222 if avx_available {
223 // The Intel docs don't seem to document the detection. The instruction
224 // definitions of the VEX.256 instructions reference the
225 // VAES/VPCLMULQDQ features and the documentation for the extended
226 // features gives the values. We combine these into one feature because
227 // we never use them independently.
228 let vaes_available = check(extended_features_ecx, 9);
229 let vclmul_available = check(extended_features_ecx, 10);
230 if vaes_available && vclmul_available {
231 set(&mut caps, Shift::VAesClmul);
232 }
233 }
234
235 // "14.7.1 Detection of Intel AVX2 Hardware support"
236 // XXX: We don't condition AVX2 on AVX. TODO: Address this.
237 // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
238 // support AVX state.
239 #[cfg(target_arch = "x86_64")]
240 if check(extended_features_ebx, 5) {
241 set(&mut caps, Shift::Avx2);
242
243 // Declared as `uint32_t` in the C code.
244 prefixed_extern! {
245 static avx2_available: AtomicU32;
246 }
247 // SAFETY: The C code only reads `avx2_available`, and its reads are
248 // synchronized through the `OnceNonZeroUsize` Acquire/Release
249 // semantics as we ensure we have a `cpu::Features` instance before
250 // calling into the C code.
251 let flag = unsafe { &avx2_available };
252 flag.store(1, core::sync::atomic::Ordering::Relaxed);
253 }
254
255 // Intel: "12.13.4 Checking for Intel AES-NI Support"
256 // If/when we support dynamic detection of SSE/SSE2, revisit this.
257 // TODO: Clarify "interesting" states like (!SSE && AVX && AES-NI)
258 // and AES-NI & !AVX.
259 // Each check of `ClMul`, `Aes`, and `Sha` must be paired with a check for
260 // an AVX feature (e.g. `Avx`) or an SSE feature (e.g. `Ssse3`), as every
261 // use will either be supported by SSE* or AVX* instructions. We then
262 // assume that those supporting instructions' prerequisites (e.g. OS
263 // support for AVX or SSE state, respectively) are the only prerequisites
264 // for these features.
265 if check(leaf1_ecx, 1) {
266 set(&mut caps, Shift::ClMul);
267 }
268 if check(leaf1_ecx, 25) {
269 set(&mut caps, Shift::Aes);
270 }
271 // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
272 // static feature detection for this.
273 #[cfg(target_arch = "x86_64")]
274 if check(extended_features_ebx, 29) {
275 set(&mut caps, Shift::Sha);
276 }
277
278 #[cfg(target_arch = "x86_64")]
279 {
280 if is_intel {
281 set(&mut caps, Shift::IntelCpu);
282 }
283
284 if check(leaf1_ecx, 22) {
285 set(&mut caps, Shift::Movbe);
286 }
287
288 let adx_available = check(extended_features_ebx, 19);
289 if adx_available {
290 set(&mut caps, Shift::Adx);
291 }
292
293 // Some 6th Generation (Skylake) CPUs claim to support BMI1 and BMI2
294 // when they don't; see erratum "SKD052". The Intel document at
295 // https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/6th-gen-core-u-y-spec-update.pdf
296 // contains the footnote "Affects 6th Generation Intel Pentium processor
297 // family and Intel Celeron processor family". Further research indicates
298 // that Skylake Pentium/Celeron do not implement AVX or ADX. It turns
299 // out that we only use BMI1 and BMI2 in combination with ADX and/or
300 // AVX.
301 //
302 // rust `std::arch::is_x86_feature_detected` does a very similar thing
303 // but only looks at AVX, not ADX. Note that they reference an older
304 // version of the erratum labeled SKL052.
305 let believe_bmi_bits = !is_intel || (adx_available || avx_available);
306
307 if check(extended_features_ebx, 3) && believe_bmi_bits {
308 set(&mut caps, Shift::Bmi1);
309 }
310
311 let bmi2_available = check(extended_features_ebx, 8) && believe_bmi_bits;
312 if bmi2_available {
313 set(&mut caps, Shift::Bmi2);
314 }
315
316 if adx_available && bmi2_available {
317 // Declared as `uint32_t` in the C code.
318 prefixed_extern! {
319 static adx_bmi2_available: AtomicU32;
320 }
321 // SAFETY: The C code only reads `adx_bmi2_available`, and its
322 // reads are synchronized through the `OnceNonZeroUsize`
323 // Acquire/Release semantics as we ensure we have a
324 // `cpu::Features` instance before calling into the C code.
325 let flag = unsafe { &adx_bmi2_available };
326 flag.store(1, core::sync::atomic::Ordering::Relaxed);
327 }
328 }
329
330 caps
331}
332
333impl_get_feature! {
334 features: [
335 { ("x86_64") => VAesClmul },
336 { ("x86", "x86_64") => ClMul },
337 { ("x86", "x86_64") => Ssse3 },
338 { ("x86_64") => Sse41 },
339 { ("x86_64") => Movbe },
340 { ("x86", "x86_64") => Aes },
341 { ("x86", "x86_64") => Avx },
342 { ("x86_64") => Bmi1 },
343 { ("x86_64") => Avx2 },
344 { ("x86_64") => Bmi2 },
345 { ("x86_64") => Adx },
346 // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
347 // static feature detection for this.
348 { ("x86_64") => Sha },
349 // x86_64 can just assume SSE2 is available.
350 { ("x86") => Sse2 },
351 ],
352}
353
354cfg_if! {
355 if #[cfg(target_arch = "x86_64")] {
356 #[derive(Clone, Copy)]
357 pub(crate) struct IntelCpu(super::Features);
358
359 impl super::GetFeature<IntelCpu> for super::features::Values {
360 fn get_feature(&self) -> Option<IntelCpu> {
361 const MASK: u32 = 1 << (Shift::IntelCpu as u32);
362 if (self.values() & MASK) == MASK {
363 Some(IntelCpu(self.cpu()))
364 } else {
365 None
366 }
367 }
368 }
369 }
370}
371
372#[cfg(test)]
373mod tests {
374 // This should always pass on any x86 system except very, very, old ones.
375 #[cfg(target_arch = "x86")]
376 #[test]
377 fn x86_has_sse2() {
378 use super::*;
379 use crate::cpu::{self, GetFeature as _};
380 assert!(matches!(cpu::features().get_feature(), Some(Sse2 { .. })))
381 }
382}