-
Notifications
You must be signed in to change notification settings - Fork 292
Start adding some avx512 intrinsics #618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
use coresimd::simd::*; | ||
use coresimd::x86::*; | ||
use mem; | ||
|
||
#[cfg(test)] | ||
use stdsimd_test::assert_instr; | ||
|
||
/// Computes the absolute values of packed 32-bit integers in `a`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32) | ||
#[inline] | ||
#[target_feature(enable = "avx512f")] | ||
#[cfg_attr(test, assert_instr(vpabsd))] | ||
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i { | ||
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), -1)) | ||
} | ||
|
||
/// Compute the absolute value of packed 32-bit integers in `a`, and store the | ||
/// unsigned results in `dst` using writemask `k` (elements are copied from | ||
/// `src` when the corresponding mask bit is not set). | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32) | ||
#[inline] | ||
#[target_feature(enable = "avx512f")] | ||
#[cfg_attr(test, assert_instr(vpabsd))] | ||
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { | ||
mem::transmute(pabsd(a.as_i32x16(), src.as_i32x16(), k)) | ||
} | ||
|
||
/// Compute the absolute value of packed 32-bit integers in `a`, and store the | ||
/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when | ||
/// the corresponding mask bit is not set). | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32) | ||
#[inline] | ||
#[target_feature(enable = "avx512f")] | ||
#[cfg_attr(test, assert_instr(vpabsd))] | ||
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { | ||
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), k)) | ||
} | ||
|
||
/// Return vector of type `__m512i` with all elements set to zero. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512) | ||
#[inline] | ||
#[target_feature(enable = "avx512f")] | ||
#[cfg_attr(test, assert_instr(vxorps))] | ||
pub unsafe fn _mm512_setzero_si512() -> __m512i { | ||
mem::zeroed() | ||
} | ||
|
||
/// Set packed 32-bit integers in `dst` with the supplied values in reverse | ||
/// order. | ||
#[inline] | ||
#[target_feature(enable = "avx512f")] | ||
pub unsafe fn _mm512_setr_epi32( | ||
e15: i32, | ||
e14: i32, | ||
e13: i32, | ||
e12: i32, | ||
e11: i32, | ||
e10: i32, | ||
e9: i32, | ||
e8: i32, | ||
e7: i32, | ||
e6: i32, | ||
e5: i32, | ||
e4: i32, | ||
e3: i32, | ||
e2: i32, | ||
e1: i32, | ||
e0: i32, | ||
) -> __m512i { | ||
let r = i32x16( | ||
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, | ||
); | ||
mem::transmute(r) | ||
} | ||
|
||
#[allow(improper_ctypes)] | ||
extern "C" { | ||
#[link_name = "llvm.x86.avx512.mask.pabs.d.512"] | ||
fn pabsd(a: i32x16, b: i32x16, c: i16) -> i32x16; | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use std; | ||
use stdsimd_test::simd_test; | ||
|
||
use coresimd::x86::*; | ||
|
||
#[simd_test(enable = "avx512f")] | ||
unsafe fn test_mm512_abs_epi32() { | ||
#[rustfmt::skip] | ||
let a = _mm512_setr_epi32( | ||
0, 1, -1, std::i32::MAX, | ||
std::i32::MIN, 100, -100, -32, | ||
0, 1, -1, std::i32::MAX, | ||
std::i32::MIN, 100, -100, -32, | ||
); | ||
let r = _mm512_abs_epi32(a); | ||
let e = _mm512_setr_epi32( | ||
0, | ||
1, | ||
1, | ||
std::i32::MAX, | ||
std::i32::MAX.wrapping_add(1), | ||
100, | ||
100, | ||
32, | ||
0, | ||
1, | ||
1, | ||
std::i32::MAX, | ||
std::i32::MAX.wrapping_add(1), | ||
100, | ||
100, | ||
32, | ||
); | ||
assert_eq_m512i(r, e); | ||
} | ||
|
||
#[simd_test(enable = "avx512f")] | ||
unsafe fn test_mm512_mask_abs_epi32() { | ||
#[rustfmt::skip] | ||
let a = _mm512_setr_epi32( | ||
0, 1, -1, std::i32::MAX, | ||
std::i32::MIN, 100, -100, -32, | ||
0, 1, -1, std::i32::MAX, | ||
std::i32::MIN, 100, -100, -32, | ||
); | ||
let r = _mm512_mask_abs_epi32(a, 0, a); | ||
assert_eq_m512i(r, a); | ||
let r = _mm512_mask_abs_epi32(a, 0b11111111, a); | ||
let e = _mm512_setr_epi32( | ||
0, | ||
1, | ||
1, | ||
std::i32::MAX, | ||
std::i32::MAX.wrapping_add(1), | ||
100, | ||
100, | ||
32, | ||
0, | ||
1, | ||
-1, | ||
std::i32::MAX, | ||
std::i32::MIN, | ||
100, | ||
-100, | ||
-32, | ||
); | ||
assert_eq_m512i(r, e); | ||
} | ||
|
||
#[simd_test(enable = "avx512f")] | ||
unsafe fn test_mm512_maskz_abs_epi32() { | ||
#[rustfmt::skip] | ||
let a = _mm512_setr_epi32( | ||
0, 1, -1, std::i32::MAX, | ||
std::i32::MIN, 100, -100, -32, | ||
0, 1, -1, std::i32::MAX, | ||
std::i32::MIN, 100, -100, -32, | ||
); | ||
let r = _mm512_maskz_abs_epi32(0, a); | ||
assert_eq_m512i(r, _mm512_setzero_si512()); | ||
let r = _mm512_maskz_abs_epi32(0b11111111, a); | ||
let e = _mm512_setr_epi32( | ||
0, | ||
1, | ||
1, | ||
std::i32::MAX, | ||
std::i32::MAX.wrapping_add(1), | ||
100, | ||
100, | ||
32, | ||
0, | ||
0, | ||
0, | ||
0, | ||
0, | ||
0, | ||
0, | ||
0, | ||
); | ||
assert_eq_m512i(r, e); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -391,6 +391,10 @@ types! { | |
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64); | ||
} | ||
|
||
/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer | ||
#[allow(non_camel_case_types)] | ||
pub type __mmask16 = i16; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW this is definitely something we're going to want to debate/decide before stabilization. I've never used this type before (or these intrinsics) so this is the most naive way to translate it, but other options include:
I'm not sure what the best option is, but others I'm sure to! |
||
|
||
#[cfg(test)] | ||
mod test; | ||
#[cfg(test)] | ||
|
@@ -502,6 +506,24 @@ impl m256iExt for __m256i { | |
} | ||
} | ||
|
||
#[allow(non_camel_case_types)] | ||
#[unstable(feature = "stdimd_internal", issue = "0")] | ||
pub(crate) trait m512iExt: Sized { | ||
fn as_m512i(self) -> __m512i; | ||
|
||
#[inline] | ||
fn as_i32x16(self) -> ::coresimd::simd::i32x16 { | ||
unsafe { mem::transmute(self.as_m512i()) } | ||
} | ||
} | ||
|
||
impl m512iExt for __m512i { | ||
#[inline] | ||
fn as_m512i(self) -> Self { | ||
self | ||
} | ||
} | ||
|
||
mod eflags; | ||
pub use self::eflags::*; | ||
|
||
|
@@ -580,3 +602,6 @@ use stdsimd_test::assert_instr; | |
pub unsafe fn ud2() -> ! { | ||
::intrinsics::abort() | ||
} | ||
|
||
mod avx512f; | ||
pub use self::avx512f::*; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Weird, why is this needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh it wasn't needed per se, but it made development a lot easier as it doesn't regress any codegen and it makes release builds (assert_instr) much faster