-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][AVX10.2] Fix wrong predicates for BF16 feature #113800
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Since AVX10.2, we need to enable 128/256-bit vector by default and check for 512 feature for 512-bit vector.
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesSince AVX10.2, we need to enable 128/256-bit vector by default and check for 512 feature for 512-bit vector. Full diff: https://github.com/llvm/llvm-project/pull/113800.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a6d77873ec2901..9d447959faf55a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2406,7 +2406,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
}
- if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasBF16() &&
+ Subtarget.useAVX512Regs()) {
addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
setF16Action(MVT::v32bf16, Expand);
for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV})
@@ -2419,27 +2420,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
- addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
- addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
- addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
-
- setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
- setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
- setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
- setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
- setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
- setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
- setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
- if (Subtarget.hasVLX()) {
- for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
- setOperationAction(ISD::FADD, VT, Legal);
- setOperationAction(ISD::FSUB, VT, Legal);
- setOperationAction(ISD::FMUL, VT, Legal);
- setOperationAction(ISD::FDIV, VT, Legal);
- setOperationAction(ISD::FSQRT, VT, Legal);
- setOperationAction(ISD::FMA, VT, Legal);
- setOperationAction(ISD::SETCC, VT, Custom);
- }
+ for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
+ setOperationAction(ISD::FADD, VT, Legal);
+ setOperationAction(ISD::FSUB, VT, Legal);
+ setOperationAction(ISD::FMUL, VT, Legal);
+ setOperationAction(ISD::FDIV, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ }
+ if (Subtarget.hasAVX10_2_512()) {
+ setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
+ setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
+ setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
}
}
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
index e0f5679e8ac96d..c97d27ff324bbb 100644
--- a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
@@ -1166,3 +1166,25 @@ entry:
%2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
ret <8 x bfloat> %2
}
+
+define <32 x bfloat> @addv(<32 x bfloat> %a, <32 x bfloat> %b) nounwind {
+; X64-LABEL: addv:
+; X64: # %bb.0:
+; X64-NEXT: vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
+; X64-NEXT: vaddnepbf16 %ymm3, %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xcb]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: addv:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp # encoding: [0x55]
+; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5]
+; X86-NEXT: andl $-32, %esp # encoding: [0x83,0xe4,0xe0]
+; X86-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20]
+; X86-NEXT: vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
+; X86-NEXT: vaddnepbf16 8(%ebp), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0x8d,0x08,0x00,0x00,0x00]
+; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec]
+; X86-NEXT: popl %ebp # encoding: [0x5d]
+; X86-NEXT: retl # encoding: [0xc3]
+ %add = fadd <32 x bfloat> %a, %b
+ ret <32 x bfloat> %add
+}
|
@@ -2419,27 +2420,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |||
} | |||
|
|||
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to check VLX here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, we don't. VLX is a concept of legacy AVX512 features. We have to reuse it for 10.1, but since 10.2, we don't need it anymore.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/8974 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/7624 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/139/builds/5472 Here is the relevant piece of the build log for the reference
|
Since AVX10.2, we need to enable 128/256-bit vector by default and check for 512 feature for 512-bit vector.
Since AVX10.2, we need to enable 128/256-bit vector by default and check for 512 feature for 512-bit vector.