Skip to content

Start adding some avx512 intrinsics #618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ environment:
# default so pass a flag to disable it to ensure our tests work ok.
RUSTFLAGS: -Clink-args=/OPT:NOICF

# VS2017 looks to be the first with avx-512 support, notably in dumpbin
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017

matrix:
- TARGET: x86_64-pc-windows-msvc

Expand Down
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ exclude = [
[profile.release]
debug = true
opt-level = 3
incremental = true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Weird, why is this needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh it wasn't needed per se, but it made development a lot easier as it doesn't regress any codegen and it makes release builds (assert_instr) much faster


[profile.bench]
debug = 1
opt-level = 3
incremental = true
8 changes: 8 additions & 0 deletions coresimd/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,11 @@ simd_ty!(i32x8[i32]:
i32, i32, i32, i32, i32, i32, i32, i32
| x0, x1, x2, x3, x4, x5, x6, x7);
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);

// 512-bit wide types:

simd_ty!(i32x16[i32]:
i32, i32, i32, i32, i32, i32, i32, i32,
i32, i32, i32, i32, i32, i32, i32, i32
| x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15);
189 changes: 189 additions & 0 deletions coresimd/x86/avx512f.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
use coresimd::simd::*;
use coresimd::x86::*;
use mem;

#[cfg(test)]
use stdsimd_test::assert_instr;

/// Computes the absolute values of packed 32-bit integers in `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), -1))
}

/// Compute the absolute value of packed 32-bit integers in `a`, and store the
/// unsigned results in `dst` using writemask `k` (elements are copied from
/// `src` when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
mem::transmute(pabsd(a.as_i32x16(), src.as_i32x16(), k))
}

/// Compute the absolute value of packed 32-bit integers in `a`, and store the
/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
/// the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), k))
}

/// Return vector of type `__m512i` with all elements set to zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_si512() -> __m512i {
mem::zeroed()
}

/// Set packed 32-bit integers in `dst` with the supplied values in reverse
/// order.
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_epi32(
e15: i32,
e14: i32,
e13: i32,
e12: i32,
e11: i32,
e10: i32,
e9: i32,
e8: i32,
e7: i32,
e6: i32,
e5: i32,
e4: i32,
e3: i32,
e2: i32,
e1: i32,
e0: i32,
) -> __m512i {
let r = i32x16(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
);
mem::transmute(r)
}

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512.mask.pabs.d.512"]
fn pabsd(a: i32x16, b: i32x16, c: i16) -> i32x16;
}

#[cfg(test)]
mod tests {
use std;
use stdsimd_test::simd_test;

use coresimd::x86::*;

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
);
let r = _mm512_abs_epi32(a);
let e = _mm512_setr_epi32(
0,
1,
1,
std::i32::MAX,
std::i32::MAX.wrapping_add(1),
100,
100,
32,
0,
1,
1,
std::i32::MAX,
std::i32::MAX.wrapping_add(1),
100,
100,
32,
);
assert_eq_m512i(r, e);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
);
let r = _mm512_mask_abs_epi32(a, 0, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
let e = _mm512_setr_epi32(
0,
1,
1,
std::i32::MAX,
std::i32::MAX.wrapping_add(1),
100,
100,
32,
0,
1,
-1,
std::i32::MAX,
std::i32::MIN,
100,
-100,
-32,
);
assert_eq_m512i(r, e);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
);
let r = _mm512_maskz_abs_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_abs_epi32(0b11111111, a);
let e = _mm512_setr_epi32(
0,
1,
1,
std::i32::MAX,
std::i32::MAX.wrapping_add(1),
100,
100,
32,
0,
0,
0,
0,
0,
0,
0,
0,
);
assert_eq_m512i(r, e);
}
}
25 changes: 25 additions & 0 deletions coresimd/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,10 @@ types! {
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
}

/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer
#[allow(non_camel_case_types)]
pub type __mmask16 = i16;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW this is definitely something we're going to want to debate/decide before stabilization. I've never used this type before (or these intrinsics) so this is the most naive way to translate it, but other options include:

  • A newtype struct wrapper
  • An unsigned number

I'm not sure what the best option is, but others I'm sure to!


#[cfg(test)]
mod test;
#[cfg(test)]
Expand Down Expand Up @@ -502,6 +506,24 @@ impl m256iExt for __m256i {
}
}

#[allow(non_camel_case_types)]
#[unstable(feature = "stdimd_internal", issue = "0")]
pub(crate) trait m512iExt: Sized {
fn as_m512i(self) -> __m512i;

#[inline]
fn as_i32x16(self) -> ::coresimd::simd::i32x16 {
unsafe { mem::transmute(self.as_m512i()) }
}
}

impl m512iExt for __m512i {
#[inline]
fn as_m512i(self) -> Self {
self
}
}

mod eflags;
pub use self::eflags::*;

Expand Down Expand Up @@ -580,3 +602,6 @@ use stdsimd_test::assert_instr;
pub unsafe fn ud2() -> ! {
::intrinsics::abort()
}

mod avx512f;
pub use self::avx512f::*;
8 changes: 8 additions & 0 deletions coresimd/x86/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,11 @@ mod x86_polyfill {
pub use coresimd::x86_64::{_mm256_insert_epi64, _mm_insert_epi64};
}
pub use self::x86_polyfill::*;

pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
union A {
a: __m512i,
b: [i32; 16],
}
assert_eq!(A { a }.b, A { a: b }.b)
}
1 change: 1 addition & 0 deletions crates/coresimd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
sse4a_target_feature,
arm_target_feature,
aarch64_target_feature,
avx512_target_feature,
mips_target_feature,
powerpc_target_feature
)]
Expand Down
4 changes: 4 additions & 0 deletions crates/stdsimd-verify/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"__m256" => quote! { &M256 },
"__m256d" => quote! { &M256D },
"__m256i" => quote! { &M256I },
"__m512" => quote! { &M512 },
"__m512d" => quote! { &M512D },
"__m512i" => quote! { &M512I },
"__mmask16" => quote! { &MMASK16 },
"__m64" => quote! { &M64 },
"bool" => quote! { &BOOL },
"f32" => quote! { &F32 },
Expand Down
17 changes: 17 additions & 0 deletions crates/stdsimd-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ static M128D: Type = Type::M128D;
static M256: Type = Type::M256;
static M256I: Type = Type::M256I;
static M256D: Type = Type::M256D;
static M512: Type = Type::M512;
static M512I: Type = Type::M512I;
static M512D: Type = Type::M512D;
static MMASK16: Type = Type::MMASK16;

static TUPLE: Type = Type::Tuple;
static CPUID: Type = Type::CpuidResult;
Expand All @@ -72,6 +76,10 @@ enum Type {
M256,
M256D,
M256I,
M512,
M512D,
M512I,
MMASK16,
Tuple,
CpuidResult,
Never,
Expand Down Expand Up @@ -422,6 +430,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
| (&Type::M256, "__m256")
| (&Type::Ptr(&Type::M256), "__m256*") => {}

(&Type::M512I, "__m512i")
| (&Type::Ptr(&Type::M512I), "__m512i*")
| (&Type::M512D, "__m512d")
| (&Type::Ptr(&Type::M512D), "__m512d*")
| (&Type::M512, "__m512")
| (&Type::Ptr(&Type::M512), "__m512*") => {}

(&Type::MMASK16, "__mmask16") => {}

// This is a macro (?) in C which seems to mutate its arguments, but
// that means that we're taking pointers to arguments in rust
// as we're not exposing it as a macro.
Expand Down