Skip to content

Commit 52ccd73

Browse files
committed
added VAES intrinsics
This adds support for the x86-64 VAES (vectorized AES) intrinsics. It adds in both the full-width and the VL'ed 256-bit versions and ensures via tests that they match the same test vectors as the AES-NI versions and that they match (though sadly w/o quickcheck validation for now)
1 parent 9b9a7d5 commit 52ccd73

File tree

2 files changed

+331
-0
lines changed

2 files changed

+331
-0
lines changed
Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
//! Vectorized AES Instructions (VAES)
2+
//!
3+
//! The intrinsics here correspond to those in the `immintrin.h` C header.
4+
//!
5+
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6+
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7+
//!
8+
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9+
10+
use crate::core_arch::x86::__m512i;
11+
use crate::core_arch::x86::__m256i;
12+
13+
#[cfg(test)]
14+
use stdarch_test::assert_instr;
15+
16+
#[allow(improper_ctypes)]
17+
extern "C" {
18+
#[link_name = "llvm.x86.aesni.aesenc.256"]
19+
fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
20+
#[link_name = "llvm.x86.aesni.aesenclast.256"]
21+
fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
22+
#[link_name = "llvm.x86.aesni.aesdec.256"]
23+
fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
24+
#[link_name = "llvm.x86.aesni.aesdeclast.256"]
25+
fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
26+
#[link_name = "llvm.x86.aesni.aesenc.512"]
27+
fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
28+
#[link_name = "llvm.x86.aesni.aesenclast.512"]
29+
fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
30+
#[link_name = "llvm.x86.aesni.aesdec.512"]
31+
fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
32+
#[link_name = "llvm.x86.aesni.aesdeclast.512"]
33+
fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
34+
}
35+
36+
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
37+
/// the corresponding 128-bit word (key) in `round_key`.
38+
///
39+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
40+
#[inline]
41+
#[target_feature(enable = "avx512vaes,avx512vl")]
42+
#[cfg_attr(test, assert_instr(vaesenc))]
43+
pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
44+
aesenc_256(a, round_key)
45+
}
46+
47+
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
48+
/// the corresponding 128-bit word (key) in `round_key`.
49+
///
50+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
51+
#[inline]
52+
#[target_feature(enable = "avx512vaes,avx512vl")]
53+
#[cfg_attr(test, assert_instr(vaesenclast))]
54+
pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
55+
aesenclast_256(a, round_key)
56+
}
57+
58+
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
59+
/// the corresponding 128-bit word (key) in `round_key`.
60+
///
61+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
62+
#[inline]
63+
#[target_feature(enable = "avx512vaes,avx512vl")]
64+
#[cfg_attr(test, assert_instr(vaesdec))]
65+
pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
66+
aesdec_256(a, round_key)
67+
}
68+
69+
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
70+
/// the corresponding 128-bit word (key) in `round_key`.
71+
///
72+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
73+
#[inline]
74+
#[target_feature(enable = "avx512vaes,avx512vl")]
75+
#[cfg_attr(test, assert_instr(vaesdeclast))]
76+
pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
77+
aesdeclast_256(a, round_key)
78+
}
79+
80+
81+
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
82+
/// the corresponding 128-bit word (key) in `round_key`.
83+
///
84+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
85+
#[inline]
86+
#[target_feature(enable = "avx512vaes,avx512f")]
87+
#[cfg_attr(test, assert_instr(vaesenc))]
88+
pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
89+
aesenc_512(a, round_key)
90+
}
91+
92+
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
93+
/// the corresponding 128-bit word (key) in `round_key`.
94+
///
95+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
96+
#[inline]
97+
#[target_feature(enable = "avx512vaes,avx512f")]
98+
#[cfg_attr(test, assert_instr(vaesenclast))]
99+
pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
100+
aesenclast_512(a, round_key)
101+
}
102+
103+
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
104+
/// the corresponding 128-bit word (key) in `round_key`.
105+
///
106+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
107+
#[inline]
108+
#[target_feature(enable = "avx512vaes,avx512f")]
109+
#[cfg_attr(test, assert_instr(vaesdec))]
110+
pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
111+
aesdec_512(a, round_key)
112+
}
113+
114+
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
115+
/// the corresponding 128-bit word (key) in `round_key`.
116+
///
117+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
118+
#[inline]
119+
#[target_feature(enable = "avx512vaes,avx512f")]
120+
#[cfg_attr(test, assert_instr(vaesdeclast))]
121+
pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
122+
aesdeclast_512(a, round_key)
123+
}
124+
125+
#[cfg(test)]
126+
mod tests {
127+
// The constants in the tests below are just bit patterns. They should not
128+
// be interpreted as integers; signedness does not make sense for them, but
129+
// __mXXXi happens to be defined in terms of signed integers.
130+
#![allow(overflowing_literals)]
131+
132+
use stdarch_test::simd_test;
133+
134+
use crate::core_arch::x86::*;
135+
136+
// the first parts of these tests are straight ports from the AES-NI tests
137+
// the second parts directly compare the two, for inputs that are different across lanes
138+
// and "more random" than the standard test vectors
139+
// ideally we'd be using quickcheck here instead
140+
141+
#[target_feature(enable = "avx512vaes,avx512vl")]
142+
unsafe fn helper_for_256_avx512vaes(linear : unsafe fn(__m128i,__m128i)->__m128i, vectorized : unsafe fn(__m256i,__m256i)->__m256i) {
143+
let a = _mm256_set_epi64x(
144+
0xDCB4DB3657BF0B7D, 0x18DB0601068EDD9F, 0xB76B908233200DC5, 0xE478235FA8E22D5E
145+
);
146+
let k = _mm256_set_epi64x(
147+
0x672F6F105A94CEA7, 0x8298B8FFCA5F829C, 0xA3927047B3FB61D8, 0x978093862CDE7187
148+
);
149+
let mut a_decomp = [_mm_setzero_si128();2];
150+
a_decomp[0] = _mm256_extracti128_si256(a,0);
151+
a_decomp[1] = _mm256_extracti128_si256(a,1);
152+
let mut k_decomp = [_mm_setzero_si128();2];
153+
k_decomp[0] = _mm256_extracti128_si256(k,0);
154+
k_decomp[1] = _mm256_extracti128_si256(k,1);
155+
let r = vectorized(a,k);
156+
let mut e_decomp = [_mm_setzero_si128();2];
157+
for i in 0..2 {
158+
e_decomp[i] = linear(a_decomp[i],k_decomp[i]);
159+
}
160+
assert_eq_m128i(_mm256_extracti128_si256(r,0),e_decomp[0]);
161+
assert_eq_m128i(_mm256_extracti128_si256(r,1),e_decomp[1]);
162+
}
163+
164+
#[simd_test(enable = "avx512vaes,avx512vl")]
165+
unsafe fn test_mm256_aesdec_epi128() {
166+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
167+
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff);
168+
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee);
169+
let e = _mm256_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee);
170+
let r = _mm256_aesdec_epi128(a, k);
171+
assert_eq_m256i(r, e);
172+
173+
helper_for_256_avx512vaes(_mm_aesdec_si128,_mm256_aesdec_epi128);
174+
}
175+
176+
#[simd_test(enable = "avx512vaes,avx512vl")]
177+
unsafe fn test_mm256_aesdeclast_epi128() {
178+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
179+
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff);
180+
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee);
181+
let e = _mm256_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493);
182+
let r = _mm256_aesdeclast_epi128(a, k);
183+
assert_eq_m256i(r, e);
184+
185+
helper_for_256_avx512vaes(_mm_aesdeclast_si128,_mm256_aesdeclast_epi128);
186+
}
187+
188+
#[simd_test(enable = "avx512vaes,avx512vl")]
189+
unsafe fn test_mm256_aesenc_epi128() {
190+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
191+
// they are repeated appropriately
192+
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff);
193+
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee);
194+
let e = _mm256_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333);
195+
let r = _mm256_aesenc_epi128(a, k);
196+
assert_eq_m256i(r, e);
197+
198+
helper_for_256_avx512vaes(_mm_aesenc_si128,_mm256_aesenc_epi128);
199+
}
200+
201+
#[simd_test(enable = "avx512vaes,avx512vl")]
202+
unsafe fn test_mm256_aesenclast_epi128() {
203+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
204+
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff);
205+
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee);
206+
let e = _mm256_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
207+
let r = _mm256_aesenclast_epi128(a, k);
208+
assert_eq_m256i(r, e);
209+
210+
helper_for_256_avx512vaes(_mm_aesenclast_si128,_mm256_aesenclast_epi128);
211+
}
212+
213+
#[target_feature(enable = "avx512vaes,avx512f")]
214+
unsafe fn helper_for_512_avx512vaes(linear : unsafe fn(__m128i,__m128i)->__m128i, vectorized : unsafe fn(__m512i,__m512i)->__m512i) {
215+
let a = _mm512_set_epi64(
216+
0xDCB4DB3657BF0B7D, 0x18DB0601068EDD9F, 0xB76B908233200DC5, 0xE478235FA8E22D5E,
217+
0xAB05CFFA2621154C, 0x1171B47A186174C9, 0x8C6B6C0E7595CEC9, 0xBE3E7D4934E961BD
218+
);
219+
let k = _mm512_set_epi64(
220+
0x672F6F105A94CEA7, 0x8298B8FFCA5F829C, 0xA3927047B3FB61D8, 0x978093862CDE7187,
221+
0xB1927AB22F31D0EC, 0xA9A5DA619BE4D7AF, 0xCA2590F56884FDC6, 0x19BE9F660038BDB5
222+
);
223+
let mut a_decomp = [_mm_setzero_si128();4];
224+
a_decomp[0] = _mm512_extracti32x4_epi32(a,0);
225+
a_decomp[1] = _mm512_extracti32x4_epi32(a,1);
226+
a_decomp[2] = _mm512_extracti32x4_epi32(a,2);
227+
a_decomp[3] = _mm512_extracti32x4_epi32(a,3);
228+
let mut k_decomp = [_mm_setzero_si128();4];
229+
k_decomp[0] = _mm512_extracti32x4_epi32(k,0);
230+
k_decomp[1] = _mm512_extracti32x4_epi32(k,1);
231+
k_decomp[2] = _mm512_extracti32x4_epi32(k,2);
232+
k_decomp[3] = _mm512_extracti32x4_epi32(k,3);
233+
let r = vectorized(a,k);
234+
let mut e_decomp = [_mm_setzero_si128();4];
235+
for i in 0..4 {
236+
e_decomp[i] = linear(a_decomp[i],k_decomp[i]);
237+
}
238+
assert_eq_m128i(_mm512_extracti32x4_epi32(r,0),e_decomp[0]);
239+
assert_eq_m128i(_mm512_extracti32x4_epi32(r,1),e_decomp[1]);
240+
assert_eq_m128i(_mm512_extracti32x4_epi32(r,2),e_decomp[2]);
241+
assert_eq_m128i(_mm512_extracti32x4_epi32(r,3),e_decomp[3]);
242+
}
243+
244+
#[simd_test(enable = "avx512vaes,avx512f")]
245+
unsafe fn test_mm512_aesdec_epi128() {
246+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
247+
let a = _mm512_set_epi64(
248+
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff,
249+
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff
250+
);
251+
let k = _mm512_set_epi64(
252+
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee,
253+
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee
254+
);
255+
let e = _mm512_set_epi64(
256+
0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee,
257+
0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee
258+
);
259+
let r = _mm512_aesdec_epi128(a, k);
260+
assert_eq_m512i(r, e);
261+
262+
helper_for_512_avx512vaes(_mm_aesdec_si128,_mm512_aesdec_epi128);
263+
}
264+
265+
#[simd_test(enable = "avx512vaes,avx512f")]
266+
unsafe fn test_mm512_aesdeclast_epi128() {
267+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
268+
let a = _mm512_set_epi64(
269+
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff,
270+
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff
271+
);
272+
let k = _mm512_set_epi64(
273+
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee,
274+
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee
275+
);
276+
let e = _mm512_set_epi64(
277+
0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493,
278+
0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493
279+
);
280+
let r = _mm512_aesdeclast_epi128(a, k);
281+
assert_eq_m512i(r, e);
282+
283+
helper_for_512_avx512vaes(_mm_aesdeclast_si128,_mm512_aesdeclast_epi128);
284+
}
285+
286+
#[simd_test(enable = "avx512vaes,avx512f")]
287+
unsafe fn test_mm512_aesenc_epi128() {
288+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
289+
// they are repeated appropriately
290+
let a = _mm512_set_epi64(
291+
0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff,
292+
0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff
293+
);
294+
let k = _mm512_set_epi64(
295+
0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee,
296+
0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee,
297+
);
298+
let e = _mm512_set_epi64(
299+
0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333,
300+
0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333
301+
);
302+
let r = _mm512_aesenc_epi128(a, k);
303+
assert_eq_m512i(r, e);
304+
305+
helper_for_512_avx512vaes(_mm_aesenc_si128,_mm512_aesenc_epi128);
306+
}
307+
308+
#[simd_test(enable = "avx512vaes,avx512f")]
309+
unsafe fn test_mm512_aesenclast_epi128() {
310+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
311+
let a = _mm512_set_epi64(
312+
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff,
313+
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff,
314+
);
315+
let k = _mm512_set_epi64(
316+
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee,
317+
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee
318+
);
319+
let e = _mm512_set_epi64(
320+
0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,
321+
0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8
322+
);
323+
let r = _mm512_aesenclast_epi128(a, k);
324+
assert_eq_m512i(r, e);
325+
326+
helper_for_512_avx512vaes(_mm_aesenclast_si128,_mm512_aesenclast_epi128);
327+
}
328+
}

crates/core_arch/src/x86/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,9 @@ pub use self::avx512f::*;
651651
mod avx512ifma;
652652
pub use self::avx512ifma::*;
653653

654+
mod avx512vaes;
655+
pub use self::avx512vaes::*;
656+
654657
mod bt;
655658
pub use self::bt::*;
656659

0 commit comments

Comments
 (0)