rust-lang · alexcrichton · Dec 14, 2018 · Dec 13, 2018 · gnzlbg · Dec 14, 2018
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -4,6 +4,9 @@ environment:
   # default so pass a flag to disable it to ensure our tests work ok.
   RUSTFLAGS: -Clink-args=/OPT:NOICF
 
+  # VS2017 looks to be the first with avx-512 support, notably in dumpbin
+  APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
+
   matrix:
   - TARGET: x86_64-pc-windows-msvc
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -10,7 +10,9 @@ exclude = [
 [profile.release]
 debug = true
 opt-level = 3
+incremental = true
 
 [profile.bench]
 debug = 1
 opt-level = 3
+incremental = true
diff --git a/coresimd/simd.rs b/coresimd/simd.rs
@@ -181,3 +181,11 @@ simd_ty!(i32x8[i32]:
          i32, i32, i32, i32, i32, i32, i32, i32
          | x0, x1, x2, x3, x4, x5, x6, x7);
 simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
+
+// 512-bit wide types:
+
+simd_ty!(i32x16[i32]:
+         i32, i32, i32, i32, i32, i32, i32, i32,
+         i32, i32, i32, i32, i32, i32, i32, i32
+         | x0, x1, x2, x3, x4, x5, x6, x7,
+         x8, x9, x10, x11, x12, x13, x14, x15);
diff --git a/coresimd/x86/avx512f.rs b/coresimd/x86/avx512f.rs
@@ -0,0 +1,189 @@
+use coresimd::simd::*;
+use coresimd::x86::*;
+use mem;
+
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+
+/// Computes the absolute values of packed 32-bit integers in `a`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsd))]
+pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
+    mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), -1))
+}
+
+/// Compute the absolute value of packed 32-bit integers in `a`, and store the
+/// unsigned results in `dst` using writemask `k` (elements are copied from
+/// `src` when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsd))]
+pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+    mem::transmute(pabsd(a.as_i32x16(), src.as_i32x16(), k))
+}
+
+/// Compute the absolute value of packed 32-bit integers in `a`, and store the
+/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
+/// the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpabsd))]
+pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
+    mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), k))
+}
+
+/// Return vector of type `__m512i` with all elements set to zero.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vxorps))]
+pub unsafe fn _mm512_setzero_si512() -> __m512i {
+    mem::zeroed()
+}
+
+/// Set packed 32-bit integers in `dst` with the supplied values in reverse
+/// order.
+#[inline]
+#[target_feature(enable = "avx512f")]
+pub unsafe fn _mm512_setr_epi32(
+    e15: i32,
+    e14: i32,
+    e13: i32,
+    e12: i32,
+    e11: i32,
+    e10: i32,
+    e9: i32,
+    e8: i32,
+    e7: i32,
+    e6: i32,
+    e5: i32,
+    e4: i32,
+    e3: i32,
+    e2: i32,
+    e1: i32,
+    e0: i32,
+) -> __m512i {
+    let r = i32x16(
+        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
+    );
+    mem::transmute(r)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.x86.avx512.mask.pabs.d.512"]
+    fn pabsd(a: i32x16, b: i32x16, c: i16) -> i32x16;
+}
+
+#[cfg(test)]
+mod tests {
+    use std;
+    use stdsimd_test::simd_test;
+
+    use coresimd::x86::*;
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_abs_epi32() {
+        #[rustfmt::skip]
+        let a = _mm512_setr_epi32(
+            0, 1, -1, std::i32::MAX,
+            std::i32::MIN, 100, -100, -32,
+            0, 1, -1, std::i32::MAX,
+            std::i32::MIN, 100, -100, -32,
+        );
+        let r = _mm512_abs_epi32(a);
+        let e = _mm512_setr_epi32(
+            0,
+            1,
+            1,
+            std::i32::MAX,
+            std::i32::MAX.wrapping_add(1),
+            100,
+            100,
+            32,
+            0,
+            1,
+            1,
+            std::i32::MAX,
+            std::i32::MAX.wrapping_add(1),
+            100,
+            100,
+            32,
+        );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_mask_abs_epi32() {
+        #[rustfmt::skip]
+        let a = _mm512_setr_epi32(
+            0, 1, -1, std::i32::MAX,
+            std::i32::MIN, 100, -100, -32,
+            0, 1, -1, std::i32::MAX,
+            std::i32::MIN, 100, -100, -32,
+        );
+        let r = _mm512_mask_abs_epi32(a, 0, a);
+        assert_eq_m512i(r, a);
+        let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
+        let e = _mm512_setr_epi32(
+            0,
+            1,
+            1,
+            std::i32::MAX,
+            std::i32::MAX.wrapping_add(1),
+            100,
+            100,
+            32,
+            0,
+            1,
+            -1,
+            std::i32::MAX,
+            std::i32::MIN,
+            100,
+            -100,
+            -32,
+        );
+        assert_eq_m512i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_maskz_abs_epi32() {
+        #[rustfmt::skip]
+        let a = _mm512_setr_epi32(
+            0, 1, -1, std::i32::MAX,
+            std::i32::MIN, 100, -100, -32,
+            0, 1, -1, std::i32::MAX,
+            std::i32::MIN, 100, -100, -32,
+        );
+        let r = _mm512_maskz_abs_epi32(0, a);
+        assert_eq_m512i(r, _mm512_setzero_si512());
+        let r = _mm512_maskz_abs_epi32(0b11111111, a);
+        let e = _mm512_setr_epi32(
+            0,
+            1,
+            1,
+            std::i32::MAX,
+            std::i32::MAX.wrapping_add(1),
+            100,
+            100,
+            32,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        );
+        assert_eq_m512i(r, e);
+    }
+}
diff --git a/coresimd/x86/mod.rs b/coresimd/x86/mod.rs
@@ -391,6 +391,10 @@ types! {
     pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
 }
 
+/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer
+#[allow(non_camel_case_types)]
+pub type __mmask16 = i16;
+
 #[cfg(test)]
 mod test;
 #[cfg(test)]
@@ -502,6 +506,24 @@ impl m256iExt for __m256i {
     }
 }
 
+#[allow(non_camel_case_types)]
+#[unstable(feature = "stdimd_internal", issue = "0")]
+pub(crate) trait m512iExt: Sized {
+    fn as_m512i(self) -> __m512i;
+
+    #[inline]
+    fn as_i32x16(self) -> ::coresimd::simd::i32x16 {
+        unsafe { mem::transmute(self.as_m512i()) }
+    }
+}
+
+impl m512iExt for __m512i {
+    #[inline]
+    fn as_m512i(self) -> Self {
+        self
+    }
+}
+
 mod eflags;
 pub use self::eflags::*;
 
@@ -580,3 +602,6 @@ use stdsimd_test::assert_instr;
 pub unsafe fn ud2() -> ! {
     ::intrinsics::abort()
 }
+
+mod avx512f;
+pub use self::avx512f::*;
diff --git a/coresimd/x86/test.rs b/coresimd/x86/test.rs
@@ -135,3 +135,11 @@ mod x86_polyfill {
     pub use coresimd::x86_64::{_mm256_insert_epi64, _mm_insert_epi64};
 }
 pub use self::x86_polyfill::*;
+
+pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
+    union A {
+        a: __m512i,
+        b: [i32; 16],
+    }
+    assert_eq!(A { a }.b, A { a: b }.b)
+}
diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs
@@ -33,6 +33,7 @@
     sse4a_target_feature,
     arm_target_feature,
     aarch64_target_feature,
+    avx512_target_feature,
     mips_target_feature,
     powerpc_target_feature
 )]

diff --git a/crates/stdsimd-verify/src/lib.rs b/crates/stdsimd-verify/src/lib.rs
@@ -98,6 +98,10 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "__m256" => quote! { &M256 },
             "__m256d" => quote! { &M256D },
             "__m256i" => quote! { &M256I },
+            "__m512" => quote! { &M512 },
+            "__m512d" => quote! { &M512D },
+            "__m512i" => quote! { &M512I },
+            "__mmask16" => quote! { &MMASK16 },
             "__m64" => quote! { &M64 },
             "bool" => quote! { &BOOL },
             "f32" => quote! { &F32 },

diff --git a/crates/stdsimd-verify/tests/x86-intel.rs b/crates/stdsimd-verify/tests/x86-intel.rs
@@ -54,6 +54,10 @@ static M128D: Type = Type::M128D;
 static M256: Type = Type::M256;
 static M256I: Type = Type::M256I;
 static M256D: Type = Type::M256D;
+static M512: Type = Type::M512;
+static M512I: Type = Type::M512I;
+static M512D: Type = Type::M512D;
+static MMASK16: Type = Type::MMASK16;
 
 static TUPLE: Type = Type::Tuple;
 static CPUID: Type = Type::CpuidResult;
@@ -72,6 +76,10 @@ enum Type {
     M256,
     M256D,
     M256I,
+    M512,
+    M512D,
+    M512I,
+    MMASK16,
     Tuple,
     CpuidResult,
     Never,
@@ -422,6 +430,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
         | (&Type::M256, "__m256")
         | (&Type::Ptr(&Type::M256), "__m256*") => {}
 
+        (&Type::M512I, "__m512i")
+        | (&Type::Ptr(&Type::M512I), "__m512i*")
+        | (&Type::M512D, "__m512d")
+        | (&Type::Ptr(&Type::M512D), "__m512d*")
+        | (&Type::M512, "__m512")
+        | (&Type::Ptr(&Type::M512), "__m512*") => {}
+
+        (&Type::MMASK16, "__mmask16") => {}
+
         // This is a macro (?) in C which seems to mutate its arguments, but
         // that means that we're taking pointers to arguments in rust
         // as we're not exposing it as a macro.