Skip to content

Commit ee046e0

Browse files
gnzlbgalexcrichton
authored andcommitted
Run-time feature detection for AES-NI and TSC (#312)
* add runtime detection for aes-ni * fmtting and fixing some clippy issues * add runtime-feature detection for tsc * fix remaining clippy issues * manually fix some formatting issues * increase feature cache size * use 2x AtomicU32 on 32-bit targets as the feature cache * use the new cache in stdsimd
1 parent 536013b commit ee046e0

File tree

40 files changed

+1039
-742
lines changed

40 files changed

+1039
-742
lines changed

coresimd/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#![allow(unused_features)]
1414
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
1515
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
16-
const_atomic_usize_new, stmt_expr_attributes, core_intrinsics,
16+
integer_atomics, stmt_expr_attributes, core_intrinsics,
1717
crate_in_paths)]
1818
#![cfg_attr(test, feature(proc_macro, test, attr_literals))]
1919
#![cfg_attr(feature = "cargo-clippy",

coresimd/src/runtime/aarch64.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//! Run-time feature detection on ARM Aarch64.
2-
use runtime::bit;
2+
use runtime::cache;
33
use runtime::arch::HasFeature;
44

55
#[macro_export]
@@ -32,12 +32,12 @@ pub enum __Feature {
3232
pmull,
3333
}
3434

35-
pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
36-
let mut value: usize = 0;
35+
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
36+
let mut value = cache::Initializer::default();
3737
{
3838
let mut enable_feature = |f| {
3939
if x.has_feature(&f) {
40-
value = bit::set(value, f as u32);
40+
value.set(f as u32);
4141
}
4242
};
4343
enable_feature(__Feature::asimd);

coresimd/src/runtime/arm.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//! Run-time feature detection on ARM Aarch32.
2-
use runtime::bit;
2+
use runtime::cache;
33
use runtime::arch::HasFeature;
44

55
#[macro_export]
@@ -28,12 +28,12 @@ pub enum __Feature {
2828
pmull,
2929
}
3030

31-
pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
32-
let mut value: usize = 0;
31+
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
32+
let mut value = cache::Initializer::default();
3333
{
3434
let mut enable_feature = |f| {
3535
if x.has_feature(&f) {
36-
value = bit::set(value, f as u32);
36+
value.set(f as u32);
3737
}
3838
};
3939
enable_feature(__Feature::neon);

coresimd/src/runtime/bit.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
1-
//! Bit manipulation utilities
2-
3-
/// Sets the `bit` of `x`.
4-
pub const fn set(x: usize, bit: u32) -> usize {
5-
x | 1 << bit
6-
}
1+
//! Bit manipulation utilities.
72
83
/// Tests the `bit` of `x`.
9-
pub const fn test(x: usize, bit: u32) -> bool {
4+
pub fn test(x: usize, bit: u32) -> bool {
5+
debug_assert!(bit < 32, "bit index out-of-bounds");
106
x & (1 << bit) != 0
117
}

coresimd/src/runtime/cache.rs

Lines changed: 129 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,147 @@
1-
//! Cache of run-time feature detection
1+
//! Caches run-time feature detection so that it only needs to be computed
2+
//! once.
23
3-
use core::sync::atomic::{AtomicUsize, Ordering};
4-
use core::usize;
4+
use core::sync::atomic::Ordering;
55

6-
use super::bit;
6+
#[cfg(target_pointer_width = "64")]
7+
use core::sync::atomic::AtomicU64;
78

8-
/// This global variable is a bitset used to cache the features supported by
9-
/// the
10-
/// CPU.
11-
static CACHE: AtomicUsize = AtomicUsize::new(usize::MAX);
9+
#[cfg(target_pointer_width = "32")]
10+
use core::sync::atomic::AtomicU32;
11+
12+
/// Sets the `bit` of `x`.
13+
pub const fn set_bit(x: u64, bit: u32) -> u64 {
14+
x | 1 << bit
15+
}
16+
17+
/// Tests the `bit` of `x`.
18+
pub const fn test_bit(x: u64, bit: u32) -> bool {
19+
x & (1 << bit) != 0
20+
}
21+
22+
/// Maximum number of features that can be cached.
23+
const CACHE_CAPACITY: u32 = 63;
24+
25+
/// This type is used to initialize the cache
26+
pub struct Initializer(u64);
27+
28+
impl Default for Initializer {
29+
fn default() -> Self {
30+
Initializer(0)
31+
}
32+
}
33+
34+
impl Initializer {
35+
/// Tests the `bit` of the cache.
36+
pub fn test(&self, bit: u32) -> bool {
37+
// FIXME: this way of making sure that the cache is large enough is
38+
// brittle.
39+
debug_assert!(
40+
bit < CACHE_CAPACITY,
41+
"too many features, time to increase the cache size!"
42+
);
43+
test_bit(self.0, bit)
44+
}
45+
/// Sets the `bit` of the cache.
46+
pub fn set(&mut self, bit: u32) {
47+
// FIXME: this way of making sure that the cache is large enough is
48+
// brittle.
49+
debug_assert!(
50+
bit < CACHE_CAPACITY,
51+
"too many features, time to increase the cache size!"
52+
);
53+
let v = self.0;
54+
self.0 = set_bit(v, bit);
55+
}
56+
}
57+
58+
/// This global variable is a cache of the features supported by the CPU.
59+
static CACHE: Cache = Cache::uninitialized();
60+
61+
/// Feature cache with capacity for `CACHE_CAPACITY` features.
62+
///
63+
/// Note: the last feature bit is used to represent an
64+
/// uninitialized cache.
65+
#[cfg(target_pointer_width = "64")]
66+
struct Cache(AtomicU64);
67+
68+
#[cfg(target_pointer_width = "64")]
69+
impl Cache {
70+
/// Creates an uninitialized cache.
71+
const fn uninitialized() -> Self {
72+
Cache(AtomicU64::new(u64::max_value()))
73+
}
74+
/// Is the cache uninitialized?
75+
pub fn is_uninitialized(&self) -> bool {
76+
self.0.load(Ordering::Relaxed) == u64::max_value()
77+
}
78+
79+
/// Is the `bit` in the cache set?
80+
pub fn test(&self, bit: u32) -> bool {
81+
test_bit(CACHE.0.load(Ordering::Relaxed), bit)
82+
}
83+
84+
/// Initializes the cache.
85+
pub fn initialize(&self, value: Initializer) {
86+
self.0.store(value.0, Ordering::Relaxed);
87+
}
88+
}
89+
90+
/// Feature cache with capacity for `CACHE_CAPACITY` features.
91+
///
92+
/// Note: the last feature bit is used to represent an
93+
/// uninitialized cache.
94+
#[cfg(target_pointer_width = "32")]
95+
struct Cache(AtomicU32, AtomicU32);
96+
97+
#[cfg(target_pointer_width = "32")]
98+
impl Cache {
99+
/// Creates an uninitialized cache.
100+
const fn uninitialized() -> Self {
101+
Cache(
102+
AtomicU32::new(u32::max_value()),
103+
AtomicU32::new(u32::max_value()),
104+
)
105+
}
106+
/// Is the cache uninitialized?
107+
pub fn is_uninitialized(&self) -> bool {
108+
self.1.load(Ordering::Relaxed) == u32::max_value()
109+
}
110+
111+
/// Is the `bit` in the cache set?
112+
pub fn test(&self, bit: u32) -> bool {
113+
if bit < 32 {
114+
test_bit(CACHE.0.load(Ordering::Relaxed) as u64, bit)
115+
} else {
116+
test_bit(CACHE.1.load(Ordering::Relaxed) as u64, bit - 32)
117+
}
118+
}
119+
120+
/// Initializes the cache.
121+
pub fn initialize(&self, value: Initializer) {
122+
let lo: u32 = value.0 as u32;
123+
let hi: u32 = (value.0 >> 32) as u32;
124+
self.0.store(lo, Ordering::Relaxed);
125+
self.1.store(hi, Ordering::Relaxed);
126+
}
127+
}
12128

13129
/// Test the `bit` of the storage. If the storage has not been initialized,
14130
/// initializes it with the result of `f()`.
15131
///
16132
/// On its first invocation, it detects the CPU features and caches them in the
17-
/// `FEATURES` global variable as an `AtomicUsize`.
133+
/// `FEATURES` global variable as an `AtomicU64`.
18134
///
19135
/// It uses the `__Feature` variant to index into this variable as a bitset. If
20136
/// the bit is set, the feature is enabled, and otherwise it is disabled.
21137
///
22138
/// PLEASE: do not use this, it is an implementation detail subject to change.
23139
pub fn test<F>(bit: u32, f: F) -> bool
24140
where
25-
F: FnOnce() -> usize,
141+
F: FnOnce() -> Initializer,
26142
{
27-
if CACHE.load(Ordering::Relaxed) == usize::MAX {
28-
CACHE.store(f(), Ordering::Relaxed);
143+
if CACHE.is_uninitialized() {
144+
CACHE.initialize(f());
29145
}
30-
bit::test(CACHE.load(Ordering::Relaxed), bit)
146+
CACHE.test(bit)
31147
}

coresimd/src/runtime/powerpc64.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//! Run-time feature detection on PowerPC64.
2-
use runtime::bit;
2+
use runtime::cache;
33
use runtime::arch::HasFeature;
44

55
#[macro_export]
@@ -33,12 +33,12 @@ pub enum __Feature {
3333
power8,
3434
}
3535

36-
pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
37-
let mut value: usize = 0;
36+
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
37+
let mut value = cache::Initializer::default();
3838
{
3939
let mut enable_feature = |f| {
4040
if x.has_feature(&f) {
41-
value = bit::set(value, f as u32);
41+
value.set(f as u32);
4242
}
4343
};
4444
enable_feature(__Feature::altivec);

coresimd/src/runtime/x86.rs

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
1919
use core::mem;
2020

21-
use super::bit;
21+
use super::{bit, cache};
2222

2323
/// This macro maps the string-literal feature names to values of the
2424
/// `__Feature` enum at compile-time. The feature names used are the same as
@@ -29,6 +29,12 @@ use super::bit;
2929
#[macro_export]
3030
#[doc(hidden)]
3131
macro_rules! __unstable_detect_feature {
32+
("aes", $unstable_detect_feature:path) => {
33+
$unstable_detect_feature(
34+
$crate::__vendor_runtime::__Feature::aes{}) };
35+
("tsc", $unstable_detect_feature:path) => {
36+
$unstable_detect_feature(
37+
$crate::__vendor_runtime::__Feature::tsc{}) };
3238
("mmx", $unstable_detect_feature:path) => {
3339
$unstable_detect_feature(
3440
$crate::__vendor_runtime::__Feature::mmx{}) };
@@ -168,6 +174,10 @@ macro_rules! __unstable_detect_feature {
168174
#[allow(non_camel_case_types)]
169175
#[repr(u8)]
170176
pub enum __Feature {
177+
/// AES (Advanced Encryption Standard New Instructions AES-NI)
178+
aes,
179+
/// TSC (Time Stamp Counter)
180+
tsc,
171181
/// MMX
172182
mmx,
173183
/// SSE (Streaming SIMD Extensions)
@@ -232,7 +242,8 @@ pub enum __Feature {
232242
xsaves,
233243
/// XSAVEC (Save Processor Extended States Compacted)
234244
xsavec,
235-
#[doc(hidden)] __NonExhaustive,
245+
#[doc(hidden)]
246+
__NonExhaustive,
236247
}
237248

238249
/// Run-time feature detection on x86 works by using the CPUID instruction.
@@ -250,10 +261,10 @@ pub enum __Feature {
250261
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
251262
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
252263
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
253-
pub fn detect_features() -> usize {
264+
pub fn detect_features() -> cache::Initializer {
254265
use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult};
255266
use vendor::_xgetbv;
256-
let mut value: usize = 0;
267+
let mut value = cache::Initializer::default();
257268

258269
// If the x86 CPU does not support the CPUID instruction then it is too
259270
// old to support any of the currently-detectable features.
@@ -329,7 +340,7 @@ pub fn detect_features() -> usize {
329340
// borrows value till the end of this scope:
330341
let mut enable = |r, rb, f| {
331342
if bit::test(r as usize, rb) {
332-
value = bit::set(value, f as u32);
343+
value.set(f as u32);
333344
}
334345
};
335346

@@ -339,8 +350,10 @@ pub fn detect_features() -> usize {
339350
enable(proc_info_ecx, 19, __Feature::sse4_1);
340351
enable(proc_info_ecx, 20, __Feature::sse4_2);
341352
enable(proc_info_ecx, 23, __Feature::popcnt);
342-
enable(proc_info_edx, 24, __Feature::fxsr);
353+
enable(proc_info_ecx, 25, __Feature::aes);
354+
enable(proc_info_edx, 4, __Feature::tsc);
343355
enable(proc_info_edx, 23, __Feature::mmx);
356+
enable(proc_info_edx, 24, __Feature::fxsr);
344357
enable(proc_info_edx, 25, __Feature::sse);
345358
enable(proc_info_edx, 26, __Feature::sse2);
346359

@@ -449,6 +462,8 @@ mod tests {
449462

450463
#[test]
451464
fn dump() {
465+
println!("aes: {:?}", cfg_feature_enabled!("aes"));
466+
println!("tsc: {:?}", cfg_feature_enabled!("tsc"));
452467
println!("sse: {:?}", cfg_feature_enabled!("sse"));
453468
println!("sse2: {:?}", cfg_feature_enabled!("sse2"));
454469
println!("sse3: {:?}", cfg_feature_enabled!("sse3"));
@@ -488,6 +503,8 @@ mod tests {
488503
#[test]
489504
fn compare_with_cupid() {
490505
let information = cupid::master().unwrap();
506+
assert_eq!(cfg_feature_enabled!("aes"), information.aesni());
507+
assert_eq!(cfg_feature_enabled!("tsc"), information.tsc());
491508
assert_eq!(cfg_feature_enabled!("sse"), information.sse());
492509
assert_eq!(cfg_feature_enabled!("sse2"), information.sse2());
493510
assert_eq!(cfg_feature_enabled!("sse3"), information.sse3());

0 commit comments

Comments
 (0)