Skip to content

Commit f89b746

Browse files
committed
Support AVX10.2-BF16 new instructions.
1 parent bd62a35 commit f89b746

31 files changed

+26814
-11
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2022,6 +2022,66 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
20222022
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
20232023
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
20242024

2025+
TARGET_BUILTIN(__builtin_ia32_loadsbf16128_mask, "V8yV8yC*V8yUc", "nV:128:", "avx10.2-256")
2026+
TARGET_BUILTIN(__builtin_ia32_storesbf16128_mask, "vV8y*V8yUc", "nV:128:", "avx10.2-256")
2027+
TARGET_BUILTIN(__builtin_ia32_vaddnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2028+
TARGET_BUILTIN(__builtin_ia32_vaddnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2029+
TARGET_BUILTIN(__builtin_ia32_vaddnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2030+
TARGET_BUILTIN(__builtin_ia32_vdivnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2031+
TARGET_BUILTIN(__builtin_ia32_vdivnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2032+
TARGET_BUILTIN(__builtin_ia32_vdivnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2033+
TARGET_BUILTIN(__builtin_ia32_vmaxpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2034+
TARGET_BUILTIN(__builtin_ia32_vmaxpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2035+
TARGET_BUILTIN(__builtin_ia32_vmaxpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2036+
TARGET_BUILTIN(__builtin_ia32_vminpbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2037+
TARGET_BUILTIN(__builtin_ia32_vminpbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2038+
TARGET_BUILTIN(__builtin_ia32_vminpbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2039+
TARGET_BUILTIN(__builtin_ia32_vmulnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2040+
TARGET_BUILTIN(__builtin_ia32_vmulnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2041+
TARGET_BUILTIN(__builtin_ia32_vmulnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2042+
TARGET_BUILTIN(__builtin_ia32_vsubnepbf16128, "V8yV8yV8y", "ncV:128:", "avx10.2-256")
2043+
TARGET_BUILTIN(__builtin_ia32_vsubnepbf16256, "V16yV16yV16y", "ncV:256:", "avx10.2-256")
2044+
TARGET_BUILTIN(__builtin_ia32_vsubnepbf16512, "V32yV32yV32y", "ncV:512:", "avx10.2-512")
2045+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16eq, "iV8yV8y", "ncV:128:", "avx10.2-256")
2046+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16lt, "iV8yV8y", "ncV:128:", "avx10.2-256")
2047+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16neq, "iV8yV8y", "ncV:128:", "avx10.2-256")
2048+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16ge, "iV8yV8y", "ncV:128:", "avx10.2-256")
2049+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16gt, "iV8yV8y", "ncV:128:", "avx10.2-256")
2050+
TARGET_BUILTIN(__builtin_ia32_vcomsbf16le, "iV8yV8y", "ncV:128:", "avx10.2-256")
2051+
TARGET_BUILTIN(__builtin_ia32_vcmppbf16512_mask,"UiV32yV32yIiUi", "ncV:512:", "avx10.2-512")
2052+
TARGET_BUILTIN(__builtin_ia32_vcmppbf16256_mask,"UsV16yV16yIiUs", "ncV:256:", "avx10.2-256")
2053+
TARGET_BUILTIN(__builtin_ia32_vcmppbf16128_mask,"UcV8yV8yIiUc", "ncV:128:", "avx10.2-256")
2054+
TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16128_mask, "UcV8yIiUc", "ncV:128:", "avx10.2-256")
2055+
TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16256_mask, "UsV16yIiUs", "ncV:256:", "avx10.2-256")
2056+
TARGET_BUILTIN(__builtin_ia32_vfpclasspbf16512_mask, "UiV32yIiUi", "ncV:512:", "avx10.2-512")
2057+
TARGET_BUILTIN(__builtin_ia32_vscalefpbf16128_mask, "V8yV8yV8yV8yUc", "ncV:128:", "avx10.2-256")
2058+
TARGET_BUILTIN(__builtin_ia32_vscalefpbf16256_mask, "V16yV16yV16yV16yUs", "ncV:256:", "avx10.2-256")
2059+
TARGET_BUILTIN(__builtin_ia32_vscalefpbf16512_mask, "V32yV32yV32yV32yUi", "ncV:512:", "avx10.2-512")
2060+
TARGET_BUILTIN(__builtin_ia32_vrcppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
2061+
TARGET_BUILTIN(__builtin_ia32_vrcppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
2062+
TARGET_BUILTIN(__builtin_ia32_vrcppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
2063+
TARGET_BUILTIN(__builtin_ia32_vgetexppbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
2064+
TARGET_BUILTIN(__builtin_ia32_vgetexppbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
2065+
TARGET_BUILTIN(__builtin_ia32_vgetexppbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
2066+
TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16128_mask, "V8yV8yV8yUc", "ncV:128:", "avx10.2-256")
2067+
TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16256_mask, "V16yV16yV16yUs", "ncV:256:", "avx10.2-256")
2068+
TARGET_BUILTIN(__builtin_ia32_vrsqrtpbf16512_mask, "V32yV32yV32yUi", "ncV:512:", "avx10.2-512")
2069+
TARGET_BUILTIN(__builtin_ia32_vreducenepbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
2070+
TARGET_BUILTIN(__builtin_ia32_vreducenepbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
2071+
TARGET_BUILTIN(__builtin_ia32_vreducenepbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
2072+
TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
2073+
TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
2074+
TARGET_BUILTIN(__builtin_ia32_vrndscalenepbf16_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
2075+
TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16128_mask, "V8yV8yIiV8yUc", "ncV:128:", "avx10.2-256")
2076+
TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16256_mask, "V16yV16yIiV16yUs", "ncV:256:", "avx10.2-256")
2077+
TARGET_BUILTIN(__builtin_ia32_vgetmantpbf16512_mask, "V32yV32yIiV32yUi", "ncV:512:", "avx10.2-512")
2078+
TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16, "V8yV8y", "ncV:128:", "avx10.2-256")
2079+
TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16256, "V16yV16y", "ncV:256:", "avx10.2-256")
2080+
TARGET_BUILTIN(__builtin_ia32_vsqrtnepbf16512, "V32yV32y", "ncV:512:", "avx10.2-512")
2081+
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "avx10.2-512")
2082+
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256")
2083+
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256")
2084+
20252085
#undef BUILTIN
20262086
#undef TARGET_BUILTIN
20272087
#undef TARGET_HEADER_BUILTIN

clang/lib/Basic/Targets/X86.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
306306
HasAVX10_1_512 = true;
307307
} else if (Feature == "+avx10.2-256") {
308308
HasAVX10_2 = true;
309+
HasFullBFloat16 = true;
309310
} else if (Feature == "+avx10.2-512") {
310311
HasAVX10_2_512 = true;
311312
} else if (Feature == "+avx512cd") {

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14631,6 +14631,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1463114631
case X86::BI__builtin_ia32_storeups512_mask:
1463214632
return EmitX86MaskedStore(*this, Ops, Align(1));
1463314633

14634+
case X86::BI__builtin_ia32_storesbf16128_mask:
1463414635
case X86::BI__builtin_ia32_storesh128_mask:
1463514636
case X86::BI__builtin_ia32_storess128_mask:
1463614637
case X86::BI__builtin_ia32_storesd128_mask:
@@ -14727,6 +14728,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1472714728
case X86::BI__builtin_ia32_vfmaddph512_mask:
1472814729
case X86::BI__builtin_ia32_vfmaddph512_maskz:
1472914730
case X86::BI__builtin_ia32_vfmaddph512_mask3:
14731+
case X86::BI__builtin_ia32_vfmaddnepbh128:
14732+
case X86::BI__builtin_ia32_vfmaddnepbh256:
14733+
case X86::BI__builtin_ia32_vfmaddnepbh512:
1473014734
case X86::BI__builtin_ia32_vfmaddps512_mask:
1473114735
case X86::BI__builtin_ia32_vfmaddps512_maskz:
1473214736
case X86::BI__builtin_ia32_vfmaddps512_mask3:
@@ -14787,6 +14791,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1478714791
case X86::BI__builtin_ia32_loaddqudi512_mask:
1478814792
return EmitX86MaskedLoad(*this, Ops, Align(1));
1478914793

14794+
case X86::BI__builtin_ia32_loadsbf16128_mask:
1479014795
case X86::BI__builtin_ia32_loadsh128_mask:
1479114796
case X86::BI__builtin_ia32_loadss128_mask:
1479214797
case X86::BI__builtin_ia32_loadsd128_mask:
@@ -15941,6 +15946,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1594115946
case X86::BI__builtin_ia32_sqrtph256:
1594215947
case X86::BI__builtin_ia32_sqrtph:
1594315948
case X86::BI__builtin_ia32_sqrtph512:
15949+
case X86::BI__builtin_ia32_vsqrtnepbf16256:
15950+
case X86::BI__builtin_ia32_vsqrtnepbf16:
15951+
case X86::BI__builtin_ia32_vsqrtnepbf16512:
1594415952
case X86::BI__builtin_ia32_sqrtps512:
1594515953
case X86::BI__builtin_ia32_sqrtpd512: {
1594615954
if (Ops.size() == 2) {
@@ -16160,6 +16168,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1616016168
case X86::BI__builtin_ia32_fpclassps128_mask:
1616116169
case X86::BI__builtin_ia32_fpclassps256_mask:
1616216170
case X86::BI__builtin_ia32_fpclassps512_mask:
16171+
case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16172+
case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16173+
case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
1616316174
case X86::BI__builtin_ia32_fpclassph128_mask:
1616416175
case X86::BI__builtin_ia32_fpclassph256_mask:
1616516176
case X86::BI__builtin_ia32_fpclassph512_mask:
@@ -16174,6 +16185,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1617416185
Intrinsic::ID ID;
1617516186
switch (BuiltinID) {
1617616187
default: llvm_unreachable("Unsupported intrinsic!");
16188+
case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16189+
ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16190+
break;
16191+
case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16192+
ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16193+
break;
16194+
case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16195+
ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16196+
break;
1617716197
case X86::BI__builtin_ia32_fpclassph128_mask:
1617816198
ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
1617916199
break;
@@ -16329,6 +16349,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1632916349
case X86::BI__builtin_ia32_cmppd128_mask:
1633016350
case X86::BI__builtin_ia32_cmppd256_mask:
1633116351
case X86::BI__builtin_ia32_cmppd512_mask:
16352+
case X86::BI__builtin_ia32_vcmppbf16512_mask:
16353+
case X86::BI__builtin_ia32_vcmppbf16256_mask:
16354+
case X86::BI__builtin_ia32_vcmppbf16128_mask:
1633216355
IsMaskFCmp = true;
1633316356
[[fallthrough]];
1633416357
case X86::BI__builtin_ia32_cmpps:

clang/lib/Headers/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,9 @@ set(x86_files
147147
amxcomplexintrin.h
148148
amxfp16intrin.h
149149
amxintrin.h
150+
avx10_2_512bf16intrin.h
150151
avx10_2_512niintrin.h
152+
avx10_2bf16intrin.h
151153
avx10_2niintrin.h
152154
avx2intrin.h
153155
avx512bf16intrin.h

0 commit comments

Comments
 (0)