Skip to content

Commit 02a8359

Browse files
gnzlbgalexcrichton
authored andcommitted
Run-time feature detection for new AArch64 features (rust-lang#339)
* aarch64 run-time feature detection for latest whitelisted features * dump new aarch64 features in the run-time detection tests * add some comments * remove old code
1 parent 678cbd3 commit 02a8359

File tree

2 files changed

+161
-6
lines changed

2 files changed

+161
-6
lines changed

crates/stdsimd/tests/cpu-detection.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,17 @@ fn arm_linux() {
1919
#[test]
2020
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
2121
fn aarch64_linux() {
22+
println!("fp: {}", is_target_feature_detected!("fp"));
23+
println!("fp16: {}", is_target_feature_detected!("fp16"));
2224
println!("neon: {}", is_target_feature_detected!("neon"));
2325
println!("asimd: {}", is_target_feature_detected!("asimd"));
24-
println!("pmull: {}", is_target_feature_detected!("pmull"));
26+
println!("sve: {}", is_target_feature_detected!("sve"));
27+
println!("crc: {}", is_target_feature_detected!("crc"));
28+
println!("crypto: {}", is_target_feature_detected!("crypto"));
29+
println!("lse: {}", is_target_feature_detected!("lse"));
30+
println!("rdm: {}", is_target_feature_detected!("rdm"));
31+
println!("rcpc: {}", is_target_feature_detected!("rcpc"));
32+
println!("dotprod: {}", is_target_feature_detected!("dotprod"));
2533
}
2634

2735
#[test]

stdsimd/arch/detect/aarch64.rs

Lines changed: 152 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,45 @@ macro_rules! is_target_feature_detected {
1717
("pmull") => {
1818
$crate::arch::detect::check_for($crate::arch::detect::Feature::pmull)
1919
};
20+
("fp") => {
21+
$crate::arch::detect::check_for($crate::arch::detect::Feature::fp)
22+
};
23+
("fp16") => {
24+
$crate::arch::detect::check_for($crate::arch::detect::Feature::fp16)
25+
};
26+
("sve") => {
27+
$crate::arch::detect::check_for($crate::arch::detect::Feature::sve)
28+
};
29+
("crc") => {
30+
$crate::arch::detect::check_for($crate::arch::detect::Feature::crc)
31+
};
32+
("crypto") => {
33+
$crate::arch::detect::check_for($crate::arch::detect::Feature::crypto)
34+
};
35+
("lse") => {
36+
$crate::arch::detect::check_for($crate::arch::detect::Feature::lse)
37+
};
38+
("rdm") => {
39+
$crate::arch::detect::check_for($crate::arch::detect::Feature::rdm)
40+
};
41+
("rcpc") => {
42+
$crate::arch::detect::check_for($crate::arch::detect::Feature::rcpc)
43+
};
44+
("dotprod") => {
45+
$crate::arch::detect::check_for($crate::arch::detect::Feature::dotprod)
46+
};
47+
("ras") => {
48+
compile_error!("\"ras\" feature cannot be detected at run-time")
49+
};
50+
("v8.1a") => {
51+
compile_error!("\"v8.1a\" feature cannot be detected at run-time")
52+
};
53+
("v8.2a") => {
54+
compile_error!("\"v8.2a\" feature cannot be detected at run-time")
55+
};
56+
("v8.3a") => {
57+
compile_error!("\"v8.3a\" feature cannot be detected at run-time")
58+
};
2059
($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
2160
}
2261

@@ -28,10 +67,28 @@ macro_rules! is_target_feature_detected {
2867
#[allow(non_camel_case_types)]
2968
#[repr(u8)]
3069
pub enum Feature {
31-
/// ARM Advanced SIMD (ASIMD) - Aarch64
70+
/// ARM Advanced SIMD (ASIMD)
3271
asimd,
3372
/// Polynomial Multiply
3473
pmull,
74+
/// Floating point support
75+
fp,
76+
/// Half-float support.
77+
fp16,
78+
/// Scalable Vector Extension (SVE)
79+
sve,
80+
/// CRC32 (Cyclic Redundancy Check)
81+
crc,
82+
/// Crypto: AES + PMULL + SHA1 + SHA2
83+
crypto,
84+
/// Atomics (Large System Extension)
85+
lse,
86+
/// Rounding Double Multiply (ASIMDRDM)
87+
rdm,
88+
/// Release consistent Processor consistent (RcPc)
89+
rcpc,
90+
/// Vector Dot-Product (ASIMDDP)
91+
dotprod,
3592
}
3693

3794
pub fn detect_features() -> cache::Initializer {
@@ -51,14 +108,104 @@ fn fill_features(value: &mut cache::Initializer) {
51108
//
52109
// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
53110
if let Ok(auxv) = linux::auxv() {
54-
enable_feature(Feature::asimd, bit::test(auxv.hwcap, 1));
55-
enable_feature(Feature::pmull, bit::test(auxv.hwcap, 4));
111+
let fp = bit::test(auxv.hwcap, 0);
112+
let asimd = bit::test(auxv.hwcap, 1);
113+
// let evtstrm = bit::test(auxv.hwcap, 2);
114+
let aes = bit::test(auxv.hwcap, 3);
115+
let pmull = bit::test(auxv.hwcap, 4);
116+
let sha1 = bit::test(auxv.hwcap, 5);
117+
let sha2 = bit::test(auxv.hwcap, 6);
118+
let crc32 = bit::test(auxv.hwcap, 7);
119+
let atomics = bit::test(auxv.hwcap, 8);
120+
let fphp = bit::test(auxv.hwcap, 9);
121+
let asimdhp = bit::test(auxv.hwcap, 10);
122+
// let cpuid = bit::test(auxv.hwcap, 11);
123+
let asimdrdm = bit::test(auxv.hwcap, 12);
124+
// let jscvt = bit::test(auxv.hwcap, 13);
125+
// let fcma = bit::test(auxv.hwcap, 14);
126+
let lrcpc = bit::test(auxv.hwcap, 15);
127+
// let dcpop = bit::test(auxv.hwcap, 16);
128+
// let sha3 = bit::test(auxv.hwcap, 17);
129+
// let sm3 = bit::test(auxv.hwcap, 18);
130+
// let sm4 = bit::test(auxv.hwcap, 19);
131+
let asimddp = bit::test(auxv.hwcap, 20);
132+
// let sha512 = bit::test(auxv.hwcap, 21);
133+
let sve = bit::test(auxv.hwcap, 22);
134+
135+
// The features are enabled approximately like in LLVM host feature detection:
136+
// https://github.com/llvm-mirror/llvm/blob/master/lib/Support/Host.cpp#L1273
137+
138+
enable_feature(Feature::fp, fp);
139+
// Half-float support requires float support
140+
enable_feature(Feature::fp16, fp && fphp);
141+
enable_feature(Feature::pmull, pmull);
142+
enable_feature(Feature::crc, crc32);
143+
enable_feature(Feature::lse, atomics);
144+
enable_feature(Feature::rcpc, lrcpc);
145+
146+
// SIMD support requires float support. If half-floats are supported,
147+
// SIMD support also requires half-float support
148+
let asimd = fp && asimd && (!fphp | asimdhp);
149+
enable_feature(Feature::asimd, asimd);
150+
// SIMD extensions require SIMD support:
151+
enable_feature(Feature::rdm, asimdrdm && asimd);
152+
enable_feature(Feature::dotprod, asimddp && asimd);
153+
enable_feature(Feature::sve, sve && asimd);
154+
155+
// Crypto is specified as AES + PMULL + SHA1 + SHA2 per LLVM/hosts.cpp
156+
enable_feature(Feature::crypto, aes && pmull && sha1 && sha2);
56157
return
57158
}
58159

160+
// FIXME: the logic for enabling features should be unified with auxv.
59161
if let Ok(c) = linux::CpuInfo::new() {
60-
enable_feature(Feature::asimd, c.field("Features").has("asimd"));
61-
enable_feature(Feature::pmull, c.field("Features").has("pmull"));
162+
let f = &c.field("Features");
163+
164+
// 64-bit names. FIXME: In 32-bit compatibility mode /proc/cpuinfo will
165+
// map some of the 64-bit names to some 32-bit feature names. This does not
166+
// cover that yet.
167+
let fp = f.has("fp");
168+
let asimd = f.has("asimd");
169+
// let evtstrm = f.has("evtstrm");
170+
let aes = f.has("aes");
171+
let pmull = f.has("pmull");
172+
let sha1 = f.has("sha1");
173+
let sha2 = f.has("sha2");
174+
let crc32 = f.has("crc32");
175+
let atomics = f.has("atomics");
176+
let fphp = f.has("fphp");
177+
let asimdhp = f.has("asimdhp");
178+
// let cpuid = f.has("cpuid");
179+
let asimdrdm = f.has("asimdrdm");
180+
// let jscvt = f.has("jscvt");
181+
// let fcma = f.has("fcma");
182+
let lrcpc = f.has("lrcpc");
183+
// let dcpop = f.has("dcpop");
184+
// let sha3 = f.has("sha3");
185+
// let sm3 = f.has("sm3");
186+
// let sm4 = f.has("sm4");
187+
let asimddp = f.has("asimddp");
188+
// let sha512 = f.has("sha512");
189+
let sve = f.has("sve");
190+
191+
enable_feature(Feature::fp, fp);
192+
enable_feature(Feature::fp16, fp && fphp);
193+
enable_feature(Feature::pmull, pmull);
194+
enable_feature(Feature::crc, crc32);
195+
enable_feature(Feature::lse, atomics);
196+
enable_feature(Feature::rcpc, lrcpc);
197+
198+
let asimd = if fphp {
199+
fp && fphp && asimd && asimdhp
200+
} else {
201+
fp && asimd
202+
};
203+
enable_feature(Feature::asimd, asimd);
204+
enable_feature(Feature::rdm, asimdrdm && asimd);
205+
enable_feature(Feature::dotprod, asimddp && asimd);
206+
enable_feature(Feature::sve, sve && asimd);
207+
208+
enable_feature(Feature::crypto, aes && pmull && sha1 && sha2);
62209
return
63210
}
64211
}

0 commit comments

Comments
 (0)