Skip to content

Commit 8219b29

Browse files
jasondaviesalexcrichton
authored andcommitted
Add support for Intel SHA extensions. (rust-lang#395)
1 parent 4126bdc commit 8219b29

File tree

5 files changed

+214
-0
lines changed

5 files changed

+214
-0
lines changed

coresimd/x86/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,3 +597,6 @@ pub use self::aes::*;
597597

598598
mod rdrand;
599599
pub use self::rdrand::*;
600+
601+
mod sha;
602+
pub use self::sha::*;

coresimd/x86/sha.rs

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
use coresimd::simd::*;
2+
use coresimd::x86::*;
3+
use mem;
4+
5+
#[allow(improper_ctypes)]
6+
extern "C" {
7+
#[link_name = "llvm.x86.sha1msg1"]
8+
fn sha1msg1(a: i32x4, b: i32x4) -> i32x4;
9+
#[link_name = "llvm.x86.sha1msg2"]
10+
fn sha1msg2(a: i32x4, b: i32x4) -> i32x4;
11+
#[link_name = "llvm.x86.sha1nexte"]
12+
fn sha1nexte(a: i32x4, b: i32x4) -> i32x4;
13+
#[link_name = "llvm.x86.sha1rnds4"]
14+
fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4;
15+
#[link_name = "llvm.x86.sha256msg1"]
16+
fn sha256msg1(a: i32x4, b: i32x4) -> i32x4;
17+
#[link_name = "llvm.x86.sha256msg2"]
18+
fn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
19+
#[link_name = "llvm.x86.sha256rnds2"]
20+
fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
21+
}
22+
23+
#[cfg(test)]
24+
use stdsimd_test::assert_instr;
25+
26+
/// Perform an intermediate calculation for the next four SHA1 message values
27+
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
28+
/// and returning the result.
29+
#[inline]
30+
#[target_feature(enable = "sha")]
31+
#[cfg_attr(test, assert_instr(sha1msg1))]
32+
pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
33+
mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4()))
34+
}
35+
36+
/// Perform the final calculation for the next four SHA1 message values
37+
/// (unsigned 32-bit integers) using the intermediate result in `a` and the
38+
/// previous message values in `b`, and returns the result.
39+
#[inline]
40+
#[target_feature(enable = "sha")]
41+
#[cfg_attr(test, assert_instr(sha1msg2))]
42+
pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
43+
mem::transmute(sha1msg2(a.as_i32x4(), b.as_i32x4()))
44+
}
45+
46+
/// Calculate SHA1 state variable E after four rounds of operation from the
47+
/// current SHA1 state variable `a`, add that value to the scheduled values
48+
/// (unsigned 32-bit integers) in `b`, and returns the result.
49+
#[inline]
50+
#[target_feature(enable = "sha")]
51+
#[cfg_attr(test, assert_instr(sha1nexte))]
52+
pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
53+
mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4()))
54+
}
55+
56+
/// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D)
57+
/// from `a` and some pre-computed sum of the next 4 round message values
58+
/// (unsigned 32-bit integers), and state variable E from `b`, and return the
59+
/// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round
60+
/// constants.
61+
#[inline]
62+
#[target_feature(enable = "sha")]
63+
#[cfg_attr(test, assert_instr(sha1rnds4, func = 0))]
64+
#[rustc_args_required_const(2)]
65+
pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i {
66+
let a = a.as_i32x4();
67+
let b = b.as_i32x4();
68+
macro_rules! call {
69+
($imm2:expr) => { sha1rnds4(a, b, $imm2) }
70+
}
71+
let ret = constify_imm2!(func, call);
72+
mem::transmute(ret)
73+
}
74+
75+
/// Perform an intermediate calculation for the next four SHA256 message values
76+
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
77+
/// and return the result.
78+
#[inline]
79+
#[target_feature(enable = "sha")]
80+
#[cfg_attr(test, assert_instr(sha256msg1))]
81+
pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
82+
mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4()))
83+
}
84+
85+
/// Perform the final calculation for the next four SHA256 message values
86+
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
87+
/// and return the result.
88+
#[inline]
89+
#[target_feature(enable = "sha")]
90+
#[cfg_attr(test, assert_instr(sha256msg2))]
91+
pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
92+
mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4()))
93+
}
94+
95+
/// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H)
96+
/// from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum
97+
/// of the next 2 round message values (unsigned 32-bit integers) and the
98+
/// corresponding round constants from `k`, and store the updated SHA256 state
99+
/// (A,B,E,F) in dst.
100+
#[inline]
101+
#[target_feature(enable = "sha")]
102+
#[cfg_attr(test, assert_instr(sha256rnds2))]
103+
pub unsafe fn _mm_sha256rnds2_epu32 (a: __m128i, b: __m128i, k: __m128i) -> __m128i {
104+
mem::transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4()))
105+
}
106+
107+
#[cfg(test)]
108+
mod tests {
109+
use std::mem::{self, transmute};
110+
use std::f64::{self, NAN};
111+
use std::f32;
112+
use std::i32;
113+
114+
use stdsimd_test::simd_test;
115+
use test::black_box; // Used to inhibit constant-folding.
116+
use coresimd::x86::*;
117+
use coresimd::simd::*;
118+
119+
#[simd_test = "sha"]
120+
#[allow(overflowing_literals)]
121+
unsafe fn test_mm_sha1msg1_epu32() {
122+
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
123+
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
124+
let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c);
125+
let r = _mm_sha1msg1_epu32(a, b);
126+
assert_eq_m128i(r, expected);
127+
}
128+
129+
#[simd_test = "sha"]
130+
#[allow(overflowing_literals)]
131+
unsafe fn test_mm_sha1msg2_epu32() {
132+
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
133+
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
134+
let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35);
135+
let r = _mm_sha1msg2_epu32(a, b);
136+
assert_eq_m128i(r, expected);
137+
}
138+
139+
#[simd_test = "sha"]
140+
#[allow(overflowing_literals)]
141+
unsafe fn test_mm_sha1nexte_epu32() {
142+
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
143+
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
144+
let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b);
145+
let r = _mm_sha1nexte_epu32(a, b);
146+
assert_eq_m128i(r, expected);
147+
}
148+
149+
#[simd_test = "sha"]
150+
#[allow(overflowing_literals)]
151+
unsafe fn test_mm_sha1rnds4_epu32() {
152+
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
153+
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
154+
let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f);
155+
let r = _mm_sha1rnds4_epu32(a, b, 0);
156+
assert_eq_m128i(r, expected);
157+
158+
let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe);
159+
let r = _mm_sha1rnds4_epu32(a, b, 1);
160+
assert_eq_m128i(r, expected);
161+
162+
let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001);
163+
let r = _mm_sha1rnds4_epu32(a, b, 2);
164+
assert_eq_m128i(r, expected);
165+
166+
let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b);
167+
let r = _mm_sha1rnds4_epu32(a, b, 3);
168+
assert_eq_m128i(r, expected);
169+
}
170+
171+
#[simd_test = "sha"]
172+
#[allow(overflowing_literals)]
173+
unsafe fn test_mm_sha256msg1_epu32() {
174+
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
175+
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
176+
let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee);
177+
let r = _mm_sha256msg1_epu32(a, b);
178+
assert_eq_m128i(r, expected);
179+
}
180+
181+
#[simd_test = "sha"]
182+
#[allow(overflowing_literals)]
183+
unsafe fn test_mm_sha256msg2_epu32() {
184+
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
185+
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
186+
let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450);
187+
let r = _mm_sha256msg2_epu32(a, b);
188+
assert_eq_m128i(r, expected);
189+
}
190+
191+
#[simd_test = "sha"]
192+
#[allow(overflowing_literals)]
193+
unsafe fn test_mm_sha256rnds2_epu32() {
194+
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
195+
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
196+
let k = _mm_set_epi64x(0, 0x12835b01d807aa98);
197+
let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19);
198+
let r = _mm_sha256rnds2_epu32(a, b, k);
199+
assert_eq_m128i(r, expected);
200+
}
201+
}

crates/stdsimd/tests/cpu-detection.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ fn x86_all() {
7171
is_x86_feature_detected!("sse4.2")
7272
);
7373
println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
74+
println!("sha: {:?}", is_x86_feature_detected!("sha"));
7475
println!("avx: {:?}", is_x86_feature_detected!("avx"));
7576
println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
7677
println!(

stdsimd/arch/detect/arch/x86.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ macro_rules! is_x86_feature_detected {
6464
cfg!(target_feature = "sse4a") || $crate::arch::detect::check_for(
6565
$crate::arch::detect::Feature::sse4a)
6666
};
67+
("sha") => {
68+
cfg!(target_feature = "sha") || $crate::arch::detect::check_for(
69+
$crate::arch::detect::Feature::sha)
70+
};
6771
("avx") => {
6872
cfg!(target_feature = "avx") || $crate::arch::detect::check_for(
6973
$crate::arch::detect::Feature::avx)
@@ -199,6 +203,8 @@ pub enum Feature {
199203
sse4_2,
200204
/// SSE4a (Streaming SIMD Extensions 4a)
201205
sse4a,
206+
/// SHA
207+
sha,
202208
/// AVX (Advanced Vector Extensions)
203209
avx,
204210
/// AVX2 (Advanced Vector Extensions 2)

stdsimd/arch/detect/os/x86.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ pub fn detect_features() -> cache::Initializer {
131131
enable(proc_info_edx, 24, Feature::fxsr);
132132
enable(proc_info_edx, 25, Feature::sse);
133133
enable(proc_info_edx, 26, Feature::sse2);
134+
enable(extended_features_ebx, 29, Feature::sha);
134135

135136
enable(extended_features_ebx, 3, Feature::bmi);
136137
enable(extended_features_ebx, 8, Feature::bmi2);
@@ -249,6 +250,7 @@ mod tests {
249250
println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
250251
println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
251252
println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
253+
println!("sha: {:?}", is_x86_feature_detected!("sha"));
252254
println!("avx: {:?}", is_x86_feature_detected!("avx"));
253255
println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
254256
println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
@@ -293,6 +295,7 @@ mod tests {
293295
assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1());
294296
assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2());
295297
assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a());
298+
assert_eq!(is_x86_feature_detected!("sha"), information.sha());
296299
assert_eq!(is_x86_feature_detected!("avx"), information.avx());
297300
assert_eq!(is_x86_feature_detected!("avx2"), information.avx2());
298301
assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f());

0 commit comments

Comments
 (0)