Skip to content

Implement avx512bf16 intrinsics #998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Feb 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/docker/i586-unknown-linux-gnu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
Expand Down
2 changes: 1 addition & 1 deletion ci/docker/i686-unknown-linux-gnu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \
Expand Down
3 changes: 2 additions & 1 deletion ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
Expand All @@ -10,4 +10,5 @@ RUN apt-get update && apt-get install -y --no-install-recommends \

RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.35.0-2019-03-11-lin.tar.bz2
RUN tar -xjf sde-external-8.35.0-2019-03-11-lin.tar.bz2
ENV SKIP_TESTS="avx512bf16"
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.35.0-2019-03-11-lin/sde64 -rtm_mode full --"
2 changes: 1 addition & 1 deletion ci/docker/x86_64-unknown-linux-gnu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
Expand Down
3 changes: 3 additions & 0 deletions ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ cargo_test() {
;;
esac

if [ "$SKIP_TESTS" != "" ]; then
cmd="$cmd --skip "$SKIP_TESTS
fi
$cmd
}

Expand Down
1,018 changes: 1,018 additions & 0 deletions crates/core_arch/src/x86/avx512bf16.rs

Large diffs are not rendered by default.

133 changes: 133 additions & 0 deletions crates/core_arch/src/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,37 @@ types! {
/// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with
/// "ps" which is used for `__m512`.
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);

/// 128-bit wide set of eight 'u16' types, x86-specific
///
/// This type is representing a 128-bit SIMD register which internally is consisted of
/// eight packed `u16` instances. It's purpose is for bf16 related intrinsic
/// implementations.
pub struct __m128bh(u16, u16, u16, u16, u16, u16, u16, u16);

/// 256-bit wide set of 16 'u16' types, x86-specific
///
/// This type is the same as the `__m128bh` type defined by Intel,
/// representing a 256-bit SIMD register which internally is consisted of
/// 16 packed `u16` instances. It's purpose is for bf16 related intrinsic
/// implementations.
pub struct __m256bh(
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16
);

/// 512-bit wide set of 32 'u16' types, x86-specific
///
/// This type is the same as the `__m128bh` type defined by Intel,
/// representing a 512-bit SIMD register which internally is consisted of
/// 32 packed `u16` instances. It's purpose is for bf16 related intrinsic
/// implementations.
pub struct __m512bh(
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16
);
}

/// The `__mmask64` type used in AVX-512 intrinsics, a 64-bit integer
Expand Down Expand Up @@ -602,6 +633,105 @@ impl m512dExt for __m512d {
}
}

#[allow(non_camel_case_types)]
#[unstable(feature = "stdsimd_internal", issue = "none")]
pub(crate) trait m128bhExt: Sized {
fn as_m128bh(self) -> __m128bh;

#[inline]
fn as_u16x8(self) -> crate::core_arch::simd::u16x8 {
unsafe { transmute(self.as_m128bh()) }
}

#[inline]
fn as_i16x8(self) -> crate::core_arch::simd::i16x8 {
unsafe { transmute(self.as_m128bh()) }
}

#[inline]
fn as_u32x4(self) -> crate::core_arch::simd::u32x4 {
unsafe { transmute(self.as_m128bh()) }
}

#[inline]
fn as_i32x4(self) -> crate::core_arch::simd::i32x4 {
unsafe { transmute(self.as_m128bh()) }
}
}

impl m128bhExt for __m128bh {
#[inline]
fn as_m128bh(self) -> Self {
self
}
}

#[allow(non_camel_case_types)]
#[unstable(feature = "stdsimd_internal", issue = "none")]
pub(crate) trait m256bhExt: Sized {
fn as_m256bh(self) -> __m256bh;

#[inline]
fn as_u16x16(self) -> crate::core_arch::simd::u16x16 {
unsafe { transmute(self.as_m256bh()) }
}

#[inline]
fn as_i16x16(self) -> crate::core_arch::simd::i16x16 {
unsafe { transmute(self.as_m256bh()) }
}

#[inline]
fn as_u32x8(self) -> crate::core_arch::simd::u32x8 {
unsafe { transmute(self.as_m256bh()) }
}

#[inline]
fn as_i32x8(self) -> crate::core_arch::simd::i32x8 {
unsafe { transmute(self.as_m256bh()) }
}
}

impl m256bhExt for __m256bh {
#[inline]
fn as_m256bh(self) -> Self {
self
}
}

#[allow(non_camel_case_types)]
#[unstable(feature = "stdsimd_internal", issue = "none")]
pub(crate) trait m512bhExt: Sized {
fn as_m512bh(self) -> __m512bh;

#[inline]
fn as_u16x32(self) -> crate::core_arch::simd::u16x32 {
unsafe { transmute(self.as_m512bh()) }
}

#[inline]
fn as_i16x32(self) -> crate::core_arch::simd::i16x32 {
unsafe { transmute(self.as_m512bh()) }
}

#[inline]
fn as_u32x16(self) -> crate::core_arch::simd::u32x16 {
unsafe { transmute(self.as_m512bh()) }
}

#[inline]
fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
unsafe { transmute(self.as_m512bh()) }
}
}

impl m512bhExt for __m512bh {
#[inline]
fn as_m512bh(self) -> Self {
self
}
}

mod eflags;
pub use self::eflags::*;

Expand Down Expand Up @@ -725,3 +855,6 @@ pub use self::rtm::*;

mod f16c;
pub use self::f16c::*;

mod avx512bf16;
pub use self::avx512bf16::*;
3 changes: 3 additions & 0 deletions crates/stdarch-verify/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,15 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() {
// x86 ...
"__m128" => quote! { &M128 },
"__m128bh" => quote! { &M128BH },
"__m128d" => quote! { &M128D },
"__m128i" => quote! { &M128I },
"__m256" => quote! { &M256 },
"__m256bh" => quote! { &M256BH },
"__m256d" => quote! { &M256D },
"__m256i" => quote! { &M256I },
"__m512" => quote! { &M512 },
"__m512bh" => quote! { &M512BH },
"__m512d" => quote! { &M512D },
"__m512i" => quote! { &M512I },
"__mmask8" => quote! { &MMASK8 },
Expand Down
18 changes: 18 additions & 0 deletions crates/stdarch-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,15 @@ static ORDERING: Type = Type::Ordering;

static M64: Type = Type::M64;
static M128: Type = Type::M128;
static M128BH: Type = Type::M128BH;
static M128I: Type = Type::M128I;
static M128D: Type = Type::M128D;
static M256: Type = Type::M256;
static M256BH: Type = Type::M256BH;
static M256I: Type = Type::M256I;
static M256D: Type = Type::M256D;
static M512: Type = Type::M512;
static M512BH: Type = Type::M512BH;
static M512I: Type = Type::M512I;
static M512D: Type = Type::M512D;
static MMASK8: Type = Type::MMASK8;
Expand All @@ -75,12 +78,15 @@ enum Type {
ConstPtr(&'static Type),
M64,
M128,
M128BH,
M128D,
M128I,
M256,
M256BH,
M256D,
M256I,
M512,
M512BH,
M512D,
M512I,
MMASK8,
Expand Down Expand Up @@ -493,6 +499,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
// The intrinsics guide calls `f16c` `fp16c` in disagreement with
// Intel's architecture manuals.
"fp16c" => String::from("f16c"),
"avx512_bf16" => String::from("avx512bf16"),
// The XML file names VNNI as "avx512_bf16", while Rust calls
// it "avx512bf16".
_ => cpuid,
};
let fixed_cpuid = fixup_cpuid(cpuid);
Expand Down Expand Up @@ -693,12 +702,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::PrimUnsigned(8), "unsigned char") => {}
(&Type::M64, "__m64") => {}
(&Type::M128, "__m128") => {}
(&Type::M128BH, "__m128bh") => {}
(&Type::M128I, "__m128i") => {}
(&Type::M128D, "__m128d") => {}
(&Type::M256, "__m256") => {}
(&Type::M256BH, "__m256bh") => {}
(&Type::M256I, "__m256i") => {}
(&Type::M256D, "__m256d") => {}
(&Type::M512, "__m512") => {}
(&Type::M512BH, "__m512bh") => {}
(&Type::M512I, "__m512i") => {}
(&Type::M512D, "__m512d") => {}
(&Type::MMASK64, "__mmask64") => {}
Expand Down Expand Up @@ -726,12 +738,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::MutPtr(&Type::PrimUnsigned(64)), "__mmask64*") => {}
(&Type::MutPtr(&Type::M64), "__m64*") => {}
(&Type::MutPtr(&Type::M128), "__m128*") => {}
(&Type::MutPtr(&Type::M128BH), "__m128bh*") => {}
(&Type::MutPtr(&Type::M128I), "__m128i*") => {}
(&Type::MutPtr(&Type::M128D), "__m128d*") => {}
(&Type::MutPtr(&Type::M256), "__m256*") => {}
(&Type::MutPtr(&Type::M256BH), "__m256bh*") => {}
(&Type::MutPtr(&Type::M256I), "__m256i*") => {}
(&Type::MutPtr(&Type::M256D), "__m256d*") => {}
(&Type::MutPtr(&Type::M512), "__m512*") => {}
(&Type::MutPtr(&Type::M512BH), "__m512bh*") => {}
(&Type::MutPtr(&Type::M512I), "__m512i*") => {}
(&Type::MutPtr(&Type::M512D), "__m512d*") => {}

Expand All @@ -754,12 +769,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "void const*") => {}
(&Type::ConstPtr(&Type::M64), "__m64 const*") => {}
(&Type::ConstPtr(&Type::M128), "__m128 const*") => {}
(&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {}
(&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}
(&Type::ConstPtr(&Type::M128D), "__m128d const*") => {}
(&Type::ConstPtr(&Type::M256), "__m256 const*") => {}
(&Type::ConstPtr(&Type::M256BH), "__m256bh const*") => {}
(&Type::ConstPtr(&Type::M256I), "__m256i const*") => {}
(&Type::ConstPtr(&Type::M256D), "__m256d const*") => {}
(&Type::ConstPtr(&Type::M512), "__m512 const*") => {}
(&Type::ConstPtr(&Type::M512BH), "__m512bh const*") => {}
(&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
(&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "__mmask32*") => {}
Expand Down