Skip to content

Commit 1b1f926

Browse files
committed
Support AVX10.2-BF16 new instructions.
1 parent 874890c commit 1b1f926

30 files changed

+26838
-9
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,6 +2217,68 @@ TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512
22172217
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
22182218
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
22192219
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
2220+
2221+
// AVX10.2 BF16
2222+
TARGET_BUILTIN(__builtin_ia32_loadsbf16128_mask, "V8yV8yC*V8yUc", "nV:128:", "avx10.2-256")
2223+
TARGET_BUILTIN(__builtin_ia32_storesbf16128_mask, "vV8y*V8yUc", "nV:128:", "avx10.2-256")
2224+
TARGET_BUILTIN(__builtin_ia32_vaddnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2225+
TARGET_BUILTIN(__builtin_ia32_vaddnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2226+
TARGET_BUILTIN(__builtin_ia32_vaddnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2227+
TARGET_BUILTIN(__builtin_ia32_vdivnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2228+
TARGET_BUILTIN(__builtin_ia32_vdivnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2229+
TARGET_BUILTIN(__builtin_ia32_vdivnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2230+
TARGET_BUILTIN(__builtin_ia32_vmaxpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2231+
TARGET_BUILTIN(__builtin_ia32_vmaxpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2232+
TARGET_BUILTIN(__builtin_ia32_vmaxpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2233+
TARGET_BUILTIN(__builtin_ia32_vminpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2234+
TARGET_BUILTIN(__builtin_ia32_vminpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2235+
TARGET_BUILTIN(__builtin_ia32_vminpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2236+
TARGET_BUILTIN(__builtin_ia32_vmulnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2237+
TARGET_BUILTIN(__builtin_ia32_vmulnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2238+
TARGET_BUILTIN(__builtin_ia32_vmulnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2239+
TARGET_BUILTIN(__builtin_ia32_vsubnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2240+
TARGET_BUILTIN(__builtin_ia32_vsubnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2241+
TARGET_BUILTIN(__builtin_ia32_vsubnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2242+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16eq, "iV8yV8y", "ncV:128:", "avx10.2-256")
2243+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16lt, "iV8yV8y", "ncV:128:", "avx10.2-256")
2244+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16neq, "iV8yV8y", "ncV:128:", "avx10.2-256")
2245+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16ge, "iV8yV8y", "ncV:128:", "avx10.2-256")
2246+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16gt, "iV8yV8y", "ncV:128:", "avx10.2-256")
2247+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16le, "iV8yV8y", "ncV:128:", "avx10.2-256")
2248+
TARGET_BUILTIN(__builtin_ia32_vcmppbf16512_mask,"UiV32yV32yIiUi", "ncV:512:", "avx10.2-512")
2249+
TARGET_BUILTIN(__builtin_ia32_vcmppbf16256_mask,"UsV16yV16yIiUs", "ncV:256:", "avx10.2-256")
2250+
TARGET_BUILTIN(__builtin_ia32_vcmppbf16128_mask,"UcV8yV8yIiUc", "ncV:128:", "avx10.2-256")
2251+
TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16128_mask, "UcV8yIiUc", "ncV:128:", "avx10.2-256")
2252+
TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16256_mask, "UsV16yIiUs", "ncV:256:", "avx10.2-256")
2253+
TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16512_mask, "UiV32yIiUi", "ncV:512:", "avx10.2-512")
2254+
TARGET_BUILTIN(__builtin_ia32_vscalefpbf16128_mask, "V8yV8yV8yV8yUc", "ncV:128:", "avx10.2-256")
2255+
TARGET_BUILTIN(__builtin_ia32_vscalefpbf16256_mask, "V16yV16yV16yV16yUs", "ncV:256:", "avx10.2-256")
2256+
TARGET_BUILTIN(__builtin_ia32_vscalefpbf16512_mask, "V32yV32yV32yV32yUi", "ncV:512:", "avx10.2-512")
2257+
TARGET_BUILTIN(__builtin_ia32_vrcppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
2258+
TARGET_BUILTIN(__builtin_ia32_vrcppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
2259+
TARGET_BUILTIN(__builtin_ia32_vrcppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
2260+
TARGET_BUILTIN(__builtin_ia32_vgetexppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
2261+
TARGET_BUILTIN(__builtin_ia32_vgetexppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
2262+
TARGET_BUILTIN(__builtin_ia32_vgetexppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
2263+
TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
2264+
TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
2265+
TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
2266+
TARGET_BUILTIN(__builtin_ia32_vreducenepbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
2267+
TARGET_BUILTIN(__builtin_ia32_vreducenepbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
2268+
TARGET_BUILTIN(__builtin_ia32_vreducenepbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
2269+
TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
2270+
TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
2271+
TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
2272+
TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
2273+
TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
2274+
TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
2275+
TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16, "V8yV8y", "ncV:128:", "avx10.2-256")
2276+
TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16256, "V16yV16y", "ncV:256:", "avx10.2-256")
2277+
TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16512, "V32yV32y", "ncV:512:", "avx10.2-512")
2278+
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "avx10.2-512")
2279+
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256")
2280+
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256")
2281+
22202282
#undef BUILTIN
22212283
#undef TARGET_BUILTIN
22222284
#undef TARGET_HEADER_BUILTIN

clang/lib/Basic/Targets/X86.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
306306
HasAVX10_1_512 = true;
307307
} else if (Feature == "+avx10.2-256") {
308308
HasAVX10_2 = true;
309+
HasFullBFloat16 = true;
309310
} else if (Feature == "+avx10.2-512") {
310311
HasAVX10_2_512 = true;
311312
} else if (Feature == "+avx512cd") {

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14703,6 +14703,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1470314703
case X86::BI__builtin_ia32_storeups512_mask:
1470414704
return EmitX86MaskedStore(*this, Ops, Align(1));
1470514705

14706+
case X86::BI__builtin_ia32_storesbf16128_mask:
1470614707
case X86::BI__builtin_ia32_storesh128_mask:
1470714708
case X86::BI__builtin_ia32_storess128_mask:
1470814709
case X86::BI__builtin_ia32_storesd128_mask:
@@ -14811,6 +14812,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1481114812
case X86::BI__builtin_ia32_vfmaddph512_mask:
1481214813
case X86::BI__builtin_ia32_vfmaddph512_maskz:
1481314814
case X86::BI__builtin_ia32_vfmaddph512_mask3:
14815+
case X86::BI__builtin_ia32_vfmaddnepbh128:
14816+
case X86::BI__builtin_ia32_vfmaddnepbh256:
14817+
case X86::BI__builtin_ia32_vfmaddnepbh512:
1481414818
case X86::BI__builtin_ia32_vfmaddps512_mask:
1481514819
case X86::BI__builtin_ia32_vfmaddps512_maskz:
1481614820
case X86::BI__builtin_ia32_vfmaddps512_mask3:
@@ -14895,6 +14899,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1489514899
case X86::BI__builtin_ia32_loaddqudi512_mask:
1489614900
return EmitX86MaskedLoad(*this, Ops, Align(1));
1489714901

14902+
case X86::BI__builtin_ia32_loadsbf16128_mask:
1489814903
case X86::BI__builtin_ia32_loadsh128_mask:
1489914904
case X86::BI__builtin_ia32_loadss128_mask:
1490014905
case X86::BI__builtin_ia32_loadsd128_mask:
@@ -16049,6 +16054,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1604916054
case X86::BI__builtin_ia32_sqrtph256:
1605016055
case X86::BI__builtin_ia32_sqrtph:
1605116056
case X86::BI__builtin_ia32_sqrtph512:
16057+
case X86::BI__builtin_ia32_vsqrtnepbf16256:
16058+
case X86::BI__builtin_ia32_vsqrtnepbf16:
16059+
case X86::BI__builtin_ia32_vsqrtnepbf16512:
1605216060
case X86::BI__builtin_ia32_sqrtps512:
1605316061
case X86::BI__builtin_ia32_sqrtpd512: {
1605416062
if (Ops.size() == 2) {
@@ -16268,6 +16276,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1626816276
case X86::BI__builtin_ia32_fpclassps128_mask:
1626916277
case X86::BI__builtin_ia32_fpclassps256_mask:
1627016278
case X86::BI__builtin_ia32_fpclassps512_mask:
16279+
case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16280+
case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16281+
case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
1627116282
case X86::BI__builtin_ia32_fpclassph128_mask:
1627216283
case X86::BI__builtin_ia32_fpclassph256_mask:
1627316284
case X86::BI__builtin_ia32_fpclassph512_mask:
@@ -16282,6 +16293,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1628216293
Intrinsic::ID ID;
1628316294
switch (BuiltinID) {
1628416295
default: llvm_unreachable("Unsupported intrinsic!");
16296+
case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16297+
ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16298+
break;
16299+
case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16300+
ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16301+
break;
16302+
case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16303+
ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16304+
break;
1628516305
case X86::BI__builtin_ia32_fpclassph128_mask:
1628616306
ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
1628716307
break;
@@ -16440,6 +16460,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1644016460
case X86::BI__builtin_ia32_vcmppd256_round_mask:
1644116461
case X86::BI__builtin_ia32_vcmpps256_round_mask:
1644216462
case X86::BI__builtin_ia32_vcmpph256_round_mask:
16463+
case X86::BI__builtin_ia32_vcmppbf16512_mask:
16464+
case X86::BI__builtin_ia32_vcmppbf16256_mask:
16465+
case X86::BI__builtin_ia32_vcmppbf16128_mask:
1644316466
IsMaskFCmp = true;
1644416467
[[fallthrough]];
1644516468
case X86::BI__builtin_ia32_cmpps:

clang/lib/Headers/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,11 @@ set(x86_files
147147
amxcomplexintrin.h
148148
amxfp16intrin.h
149149
amxintrin.h
150+
avx10_2_512bf16intrin.h
150151
avx10_2_512minmaxintrin.h
151152
avx10_2_512niintrin.h
152153
avx10_2_512satcvtintrin.h
154+
avx10_2bf16intrin.h
153155
avx10_2minmaxintrin.h
154156
avx10_2niintrin.h
155157
avx10_2satcvtintrin.h

0 commit comments

Comments
 (0)