ring/cpu/
intel.rs

1// Copyright 2016-2021 Brian Smith.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15use cfg_if::cfg_if;
16
17mod abi_assumptions {
18    use core::mem::size_of;
19
20    // TOOD: Support targets that do not have SSE and SSE2 enabled, such as
21    // x86_64-unknown-linux-none. See
22    // https://github.com/briansmith/ring/issues/1793#issuecomment-1793243725,
23    // https://github.com/briansmith/ring/issues/1832,
24    // https://github.com/briansmith/ring/issues/1833.
25    const _ASSUMES_SSE2: () =
26        assert!(cfg!(target_feature = "sse") && cfg!(target_feature = "sse2"));
27
28    #[cfg(target_arch = "x86_64")]
29    const _ASSUMED_POINTER_SIZE: usize = 8;
30    #[cfg(target_arch = "x86")]
31    const _ASSUMED_POINTER_SIZE: usize = 4;
32    const _ASSUMED_USIZE_SIZE: () = assert!(size_of::<usize>() == _ASSUMED_POINTER_SIZE);
33    const _ASSUMED_REF_SIZE: () = assert!(size_of::<&'static u8>() == _ASSUMED_POINTER_SIZE);
34
35    const _ASSUMED_ENDIANNESS: () = assert!(cfg!(target_endian = "little"));
36}
37
38pub(super) mod featureflags {
39    use super::super::CAPS_STATIC;
40    use crate::{
41        cpu,
42        polyfill::{once_cell::race, usize_from_u32},
43    };
44    use core::num::NonZeroUsize;
45
46    pub(in super::super) fn get_or_init() -> cpu::Features {
47        // SAFETY: `OPENSSL_cpuid_setup` must be called only in
48        // `INIT.call_once()` below.
49        prefixed_extern! {
50            fn OPENSSL_cpuid_setup(out: &mut [u32; 4]);
51        }
52
53        let _: NonZeroUsize = FEATURES.get_or_init(|| {
54            let mut cpuid = [0; 4];
55            // SAFETY: We assume that it is safe to execute CPUID and XGETBV.
56            unsafe {
57                OPENSSL_cpuid_setup(&mut cpuid);
58            }
59            let detected = super::cpuid_to_caps_and_set_c_flags(&cpuid);
60            let merged = CAPS_STATIC | detected;
61
62            let merged = usize_from_u32(merged) | (1 << (super::Shift::Initialized as u32));
63            NonZeroUsize::new(merged).unwrap() // Can't fail because we just set a bit.
64        });
65
66        // SAFETY: We initialized the CPU features as required.
67        // `INIT.call_once` has `happens-before` semantics.
68        unsafe { cpu::Features::new_after_feature_flags_written_and_synced_unchecked() }
69    }
70
71    pub(in super::super) fn get(_cpu_features: cpu::Features) -> u32 {
72        // SAFETY: Since only `get_or_init()` could have created
73        // `_cpu_features`, and it only does so after `FEATURES.get_or_init()`,
74        // we know we are reading from `FEATURES` after initializing it.
75        //
76        // Also, 0 means "no features detected" to users, which is designed to
77        // be a safe configuration.
78        let features = FEATURES.get().map(NonZeroUsize::get).unwrap_or(0);
79
80        // The truncation is lossless, as we set the value with a u32.
81        #[allow(clippy::cast_possible_truncation)]
82        let features = features as u32;
83
84        features
85    }
86
87    static FEATURES: race::OnceNonZeroUsize = race::OnceNonZeroUsize::new();
88
89    #[cfg(target_arch = "x86")]
90    #[rustfmt::skip]
91    pub const STATIC_DETECTED: u32 = 0
92        | (if cfg!(target_feature = "sse2") { super::Sse2::mask() } else { 0 })
93        ;
94
95    // Limited to x86_64-v2 features.
96    // TODO: Add missing x86-64-v3 features if we find real-world use of x86-64-v3.
97    // TODO: Add all features we use.
98    #[cfg(target_arch = "x86_64")]
99    #[rustfmt::skip]
100    pub const STATIC_DETECTED: u32 = 0
101        | if cfg!(target_feature = "sse4.1") { super::Sse41::mask() } else { 0 }
102        | if cfg!(target_feature = "ssse3") { super::Ssse3::mask() } else { 0 }
103        ;
104
105    pub const FORCE_DYNAMIC_DETECTION: u32 = 0;
106}
107
108fn cpuid_to_caps_and_set_c_flags(cpuid: &[u32; 4]) -> u32 {
109    // "Intel" citations are for "Intel 64 and IA-32 Architectures Software
110    // Developer’s Manual", Combined Volumes, December 2024.
111    // "AMD" citations are for "AMD64 Technology AMD64 Architecture
112    // Programmer’s Manual, Volumes 1-5" Revision 4.08 April 2024.
113
114    // The `prefixed_extern!` uses below assume this
115    #[cfg(target_arch = "x86_64")]
116    use core::{mem::align_of, sync::atomic::AtomicU32};
117    #[cfg(target_arch = "x86_64")]
118    const _ATOMIC32_ALIGNMENT_EQUALS_U32_ALIGNMENT: () =
119        assert!(align_of::<AtomicU32>() == align_of::<u32>());
120
121    fn check(leaf: u32, bit: u32) -> bool {
122        let shifted = 1 << bit;
123        (leaf & shifted) == shifted
124    }
125    fn set(out: &mut u32, shift: Shift) {
126        let shifted = 1 << (shift as u32);
127        debug_assert_eq!(*out & shifted, 0);
128        *out |= shifted;
129        debug_assert_eq!(*out & shifted, shifted);
130    }
131
132    #[cfg(target_arch = "x86_64")]
133    let is_intel = check(cpuid[0], 30); // Synthesized by `OPENSSL_cpuid_setup`
134
135    // CPUID leaf 1.
136    let leaf1_ecx = cpuid[1];
137
138    // Intel: "Structured Extended Feature Flags Enumeration Leaf"
139    #[cfg(target_arch = "x86_64")]
140    let (extended_features_ebx, extended_features_ecx) = (cpuid[2], cpuid[3]);
141
142    let mut caps = 0;
143
144    // AMD: "Collectively the SSE1, [...] are referred to as the legacy SSE
145    // instructions. All legacy SSE instructions support 128-bit vector
146    // operands."
147
148    // Intel: "11.6.2 Checking for Intel SSE and SSE2 Support"
149    // We have to assume the prerequisites for SSE/SSE2 are met since we're
150    // already almost definitely using SSE registers if these target features
151    // are enabled.
152    //
153    // These also seem to help ensure CMOV support; There doesn't seem to be
154    // a `cfg!(target_feature = "cmov")`. It is likely that removing these
155    // assertions will remove the requirement for CMOV. With our without
156    // CMOV, it is likely that some of our timing side channel prevention does
157    // not work. Presumably the people who delete these are verifying that it
158    // all works fine.
159    const _SSE_REQUIRED: () = assert!(cfg!(target_feature = "sse"));
160    const _SSE2_REQUIRED: () = assert!(cfg!(target_feature = "sse2"));
161
162    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
163    {
164        // If somebody is trying to compile for an x86 target without SSE2
165        // and they deleted the `_SSE2_REQUIRED` const assertion above then
166        // they're probably trying to support a Linux/BSD/etc. distro that
167        // tries to support ancient x86 systems without SSE/SSE2. Try to
168        // reduce the harm caused, by implementing dynamic feature detection
169        // for them so that most systems will work like normal.
170        //
171        // Note that usually an x86-64 target with SSE2 disabled by default,
172        // usually `-none-` targets, will not support dynamically-detected use
173        // of SIMD registers via CPUID. A whole different mechanism is needed
174        // to support them. Same for i*86-*-none targets.
175        let leaf1_edx = cpuid[0];
176        let sse1_available = check(leaf1_edx, 25);
177        let sse2_available = check(leaf1_edx, 26);
178        if sse1_available && sse2_available {
179            set(&mut caps, Shift::Sse2);
180        }
181    }
182
183    // Sometimes people delete the `_SSE_REQUIRED`/`_SSE2_REQUIRED` const
184    // assertions in an attempt to support pre-SSE2 32-bit x86 systems. If they
185    // do, hopefully they won't delete these redundant assertions, so that
186    // x86_64 isn't affected.
187    #[cfg(target_arch = "x86_64")]
188    const _SSE2_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
189    #[cfg(target_arch = "x86_64")]
190    const _SSE_REQUIRED_X86_64: () = assert!(cfg!(target_feature = "sse2"));
191
192    // Intel: "12.7.2 Checking for SSSE3 Support"
193    // If/when we support dynamic detection of SSE/SSE2, make this conditional
194    // on SSE/SSE2.
195    if check(leaf1_ecx, 9) {
196        set(&mut caps, Shift::Ssse3);
197    }
198
199    // Intel: "12.12.2 Checking for Intel SSE4.1 Support"
200    // If/when we support dynamic detection of SSE/SSE2, make this conditional
201    // on SSE/SSE2.
202    // XXX: We don't check for SSE3 and we're not sure if it is compatible for
203    //      us to do so; does AMD advertise SSE3? TODO: address this.
204    // XXX: We don't condition this on SSSE3 being available. TODO: address
205    //      this.
206    #[cfg(target_arch = "x86_64")]
207    if check(leaf1_ecx, 19) {
208        set(&mut caps, Shift::Sse41);
209    }
210
211    // AMD: "The extended SSE instructions include [...]."
212
213    // Intel: "14.3 DETECTION OF INTEL AVX INSTRUCTIONS"
214    // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
215    // support AVX state.
216    let avx_available = check(leaf1_ecx, 28);
217    if avx_available {
218        set(&mut caps, Shift::Avx);
219    }
220
221    #[cfg(target_arch = "x86_64")]
222    if avx_available {
223        // The Intel docs don't seem to document the detection. The instruction
224        // definitions of the VEX.256 instructions reference the
225        // VAES/VPCLMULQDQ features and the documentation for the extended
226        // features gives the values. We combine these into one feature because
227        // we never use them independently.
228        let vaes_available = check(extended_features_ecx, 9);
229        let vclmul_available = check(extended_features_ecx, 10);
230        if vaes_available && vclmul_available {
231            set(&mut caps, Shift::VAesClmul);
232        }
233    }
234
235    // "14.7.1 Detection of Intel AVX2 Hardware support"
236    // XXX: We don't condition AVX2 on AVX. TODO: Address this.
237    // `OPENSSL_cpuid_setup` clears this bit when it detects the OS doesn't
238    // support AVX state.
239    #[cfg(target_arch = "x86_64")]
240    if check(extended_features_ebx, 5) {
241        set(&mut caps, Shift::Avx2);
242
243        // Declared as `uint32_t` in the C code.
244        prefixed_extern! {
245            static avx2_available: AtomicU32;
246        }
247        // SAFETY: The C code only reads `avx2_available`, and its reads are
248        // synchronized through the `OnceNonZeroUsize` Acquire/Release
249        // semantics as we ensure we have a `cpu::Features` instance before
250        // calling into the C code.
251        let flag = unsafe { &avx2_available };
252        flag.store(1, core::sync::atomic::Ordering::Relaxed);
253    }
254
255    // Intel: "12.13.4 Checking for Intel AES-NI Support"
256    // If/when we support dynamic detection of SSE/SSE2, revisit this.
257    // TODO: Clarify "interesting" states like (!SSE && AVX && AES-NI)
258    // and AES-NI & !AVX.
259    // Each check of `ClMul`, `Aes`, and `Sha` must be paired with a check for
260    // an AVX feature (e.g. `Avx`) or an SSE feature (e.g. `Ssse3`), as every
261    // use will either be supported by SSE* or AVX* instructions. We then
262    // assume that those supporting instructions' prerequisites (e.g. OS
263    // support for AVX or SSE state, respectively) are the only prerequisites
264    // for these features.
265    if check(leaf1_ecx, 1) {
266        set(&mut caps, Shift::ClMul);
267    }
268    if check(leaf1_ecx, 25) {
269        set(&mut caps, Shift::Aes);
270    }
271    // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
272    // static feature detection for this.
273    #[cfg(target_arch = "x86_64")]
274    if check(extended_features_ebx, 29) {
275        set(&mut caps, Shift::Sha);
276    }
277
278    #[cfg(target_arch = "x86_64")]
279    {
280        if is_intel {
281            set(&mut caps, Shift::IntelCpu);
282        }
283
284        if check(leaf1_ecx, 22) {
285            set(&mut caps, Shift::Movbe);
286        }
287
288        let adx_available = check(extended_features_ebx, 19);
289        if adx_available {
290            set(&mut caps, Shift::Adx);
291        }
292
293        // Some 6th Generation (Skylake) CPUs claim to support BMI1 and BMI2
294        // when they don't; see erratum "SKD052". The Intel document at
295        // https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/6th-gen-core-u-y-spec-update.pdf
296        // contains the footnote "Affects 6th Generation Intel Pentium processor
297        // family and Intel Celeron processor family". Further research indicates
298        // that Skylake Pentium/Celeron do not implement AVX or ADX. It turns
299        // out that we only use BMI1 and BMI2 in combination with ADX and/or
300        // AVX.
301        //
302        // rust `std::arch::is_x86_feature_detected` does a very similar thing
303        // but only looks at AVX, not ADX. Note that they reference an older
304        // version of the erratum labeled SKL052.
305        let believe_bmi_bits = !is_intel || (adx_available || avx_available);
306
307        if check(extended_features_ebx, 3) && believe_bmi_bits {
308            set(&mut caps, Shift::Bmi1);
309        }
310
311        let bmi2_available = check(extended_features_ebx, 8) && believe_bmi_bits;
312        if bmi2_available {
313            set(&mut caps, Shift::Bmi2);
314        }
315
316        if adx_available && bmi2_available {
317            // Declared as `uint32_t` in the C code.
318            prefixed_extern! {
319                static adx_bmi2_available: AtomicU32;
320            }
321            // SAFETY: The C code only reads `adx_bmi2_available`, and its
322            // reads are synchronized through the `OnceNonZeroUsize`
323            // Acquire/Release semantics as we ensure we have a
324            // `cpu::Features` instance before calling into the C code.
325            let flag = unsafe { &adx_bmi2_available };
326            flag.store(1, core::sync::atomic::Ordering::Relaxed);
327        }
328    }
329
330    caps
331}
332
333impl_get_feature! {
334    features: [
335        { ("x86_64") => VAesClmul },
336        { ("x86", "x86_64") => ClMul },
337        { ("x86", "x86_64") => Ssse3 },
338        { ("x86_64") => Sse41 },
339        { ("x86_64") => Movbe },
340        { ("x86", "x86_64") => Aes },
341        { ("x86", "x86_64") => Avx },
342        { ("x86_64") => Bmi1 },
343        { ("x86_64") => Avx2 },
344        { ("x86_64") => Bmi2 },
345        { ("x86_64") => Adx },
346        // See BoringSSL 69c26de93c82ad98daecaec6e0c8644cdf74b03f before enabling
347        // static feature detection for this.
348        { ("x86_64") => Sha },
349        // x86_64 can just assume SSE2 is available.
350        { ("x86") => Sse2 },
351    ],
352}
353
354cfg_if! {
355    if #[cfg(target_arch = "x86_64")] {
356        #[derive(Clone, Copy)]
357        pub(crate) struct IntelCpu(super::Features);
358
359        impl super::GetFeature<IntelCpu> for super::features::Values {
360            fn get_feature(&self) -> Option<IntelCpu> {
361                const MASK: u32 = 1 << (Shift::IntelCpu as u32);
362                if (self.values() & MASK) == MASK {
363                    Some(IntelCpu(self.cpu()))
364                } else {
365                    None
366                }
367            }
368        }
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    // This should always pass on any x86 system except very, very, old ones.
375    #[cfg(target_arch = "x86")]
376    #[test]
377    fn x86_has_sse2() {
378        use super::*;
379        use crate::cpu::{self, GetFeature as _};
380        assert!(matches!(cpu::features().get_feature(), Some(Sse2 { .. })))
381    }
382}