-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Don't promote f16 FNEG/FABS with Zfhmin/Zhinxmin. #106474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
fneg/fabs are not supposed to canonicalize nans. Promoting to f32 will go through an fp_extend which will canonicalize. We need to use integer bit manip to clear the bit instead. Unfortunately, this is going through the stack due to i16 not being a legal type. Fixing that will require custom legalization or some other generic SelectionDAG change.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) Changesfneg/fabs are not supposed to canonicalize nans. Promoting to f32 will go through an fp_extend which will canonicalize. The generic Promote handler needs to be removed from LegalizeDAG. We need to use integer bit manip to clear the bit instead. Unfortunately, this is going through the stack due to i16 not being a legal type. Fixing that will require custom legalization or some other generic SelectionDAG change. CC: @v01dXYZ Patch is 107.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106474.diff 7 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 790107b772fcb3..15ce730e23dd3b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -395,7 +395,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FMA,
ISD::FDIV, ISD::FSQRT,
- ISD::FABS, ISD::FNEG,
ISD::STRICT_FMA, ISD::STRICT_FADD,
ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
@@ -416,6 +415,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
setOperationAction(ISD::FREM, MVT::bf16, Promote);
+ setOperationAction(ISD::FABS, MVT::bf16, Expand);
+ setOperationAction(ISD::FNEG, MVT::bf16, Expand);
// FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
// DAGCombiner::visitFP_ROUND probably needs improvements first.
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
@@ -433,6 +434,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
MVT::f16, Legal);
+ setOperationAction(ISD::FABS, MVT::f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::f16, Expand);
// FIXME: Need to promote f16 FCOPYSIGN to f32, but the
// DAGCombiner::visitFP_ROUND probably needs improvements first.
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
diff --git a/llvm/test/CodeGen/RISCV/bfloat-arith.ll b/llvm/test/CodeGen/RISCV/bfloat-arith.ll
index 632e933c595671..56a30dd0f6ffee 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-arith.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-arith.ll
@@ -105,17 +105,39 @@ define bfloat @fsgnj_s(bfloat %a, bfloat %b) nounwind {
}
define i32 @fneg_s(bfloat %a, bfloat %b) nounwind {
-; CHECK-LABEL: fneg_s:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fadd.s fa5, fa5, fa5
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fneg.s fa4, fa5
-; CHECK-NEXT: fcvt.bf16.s fa4, fa4
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT: feq.s a0, fa5, fa4
-; CHECK-NEXT: ret
+; RV32IZFBFMIN-LABEL: fneg_s:
+; RV32IZFBFMIN: # %bb.0:
+; RV32IZFBFMIN-NEXT: addi sp, sp, -16
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa5
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT: flh fa4, 12(sp)
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa4
+; RV32IZFBFMIN-NEXT: feq.s a0, fa5, fa4
+; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: ret
+;
+; RV64IZFBFMIN-LABEL: fneg_s:
+; RV64IZFBFMIN: # %bb.0:
+; RV64IZFBFMIN-NEXT: addi sp, sp, -16
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa5
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT: xori a0, a0, 128
+; RV64IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT: flh fa4, 8(sp)
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa4
+; RV64IZFBFMIN-NEXT: feq.s a0, fa5, fa4
+; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: ret
%1 = fadd bfloat %a, %a
%2 = fneg bfloat %1
%3 = fcmp oeq bfloat %1, %2
@@ -131,9 +153,11 @@ define bfloat @fsgnjn_s(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
-; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
-; RV32IZFBFMIN-NEXT: fneg.s fa5, fa5
-; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: fsh fa5, 4(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 5(sp)
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 5(sp)
+; RV32IZFBFMIN-NEXT: flh fa5, 4(sp)
; RV32IZFBFMIN-NEXT: fsh fa0, 8(sp)
; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
; RV32IZFBFMIN-NEXT: lbu a0, 9(sp)
@@ -148,24 +172,26 @@ define bfloat @fsgnjn_s(bfloat %a, bfloat %b) nounwind {
;
; RV64IZFBFMIN-LABEL: fsgnjn_s:
; RV64IZFBFMIN: # %bb.0:
-; RV64IZFBFMIN-NEXT: addi sp, sp, -16
+; RV64IZFBFMIN-NEXT: addi sp, sp, -32
; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
-; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
-; RV64IZFBFMIN-NEXT: fneg.s fa5, fa5
-; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
-; RV64IZFBFMIN-NEXT: fsh fa0, 0(sp)
; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
-; RV64IZFBFMIN-NEXT: lbu a0, 1(sp)
-; RV64IZFBFMIN-NEXT: lbu a1, 9(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT: xori a0, a0, 128
+; RV64IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT: flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: fsh fa0, 16(sp)
+; RV64IZFBFMIN-NEXT: fsh fa5, 24(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 17(sp)
+; RV64IZFBFMIN-NEXT: lbu a1, 25(sp)
; RV64IZFBFMIN-NEXT: andi a0, a0, 127
; RV64IZFBFMIN-NEXT: andi a1, a1, 128
; RV64IZFBFMIN-NEXT: or a0, a0, a1
-; RV64IZFBFMIN-NEXT: sb a0, 1(sp)
-; RV64IZFBFMIN-NEXT: flh fa0, 0(sp)
-; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: sb a0, 17(sp)
+; RV64IZFBFMIN-NEXT: flh fa0, 16(sp)
+; RV64IZFBFMIN-NEXT: addi sp, sp, 32
; RV64IZFBFMIN-NEXT: ret
%1 = fadd bfloat %a, %b
%2 = fneg bfloat %1
@@ -176,19 +202,43 @@ define bfloat @fsgnjn_s(bfloat %a, bfloat %b) nounwind {
declare bfloat @llvm.fabs.bf16(bfloat)
define bfloat @fabs_s(bfloat %a, bfloat %b) nounwind {
-; CHECK-LABEL: fabs_s:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa0
-; CHECK-NEXT: fadd.s fa5, fa4, fa5
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fabs.s fa4, fa5
-; CHECK-NEXT: fcvt.bf16.s fa4, fa4
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT: fadd.s fa5, fa4, fa5
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; RV32IZFBFMIN-LABEL: fabs_s:
+; RV32IZFBFMIN: # %bb.0:
+; RV32IZFBFMIN-NEXT: addi sp, sp, -16
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
+; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT: andi a0, a0, 127
+; RV32IZFBFMIN-NEXT: sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT: flh fa4, 12(sp)
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa4
+; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: ret
+;
+; RV64IZFBFMIN-LABEL: fabs_s:
+; RV64IZFBFMIN: # %bb.0:
+; RV64IZFBFMIN-NEXT: addi sp, sp, -16
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
+; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT: andi a0, a0, 127
+; RV64IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT: flh fa4, 8(sp)
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa4
+; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: ret
%1 = fadd bfloat %a, %b
%2 = call bfloat @llvm.fabs.bf16(bfloat %1)
%3 = fadd bfloat %2, %1
@@ -239,21 +289,45 @@ define bfloat @fmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
}
define bfloat @fmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fmsub_s:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa2
-; CHECK-NEXT: fmv.w.x fa4, zero
-; CHECK-NEXT: fadd.s fa5, fa5, fa4
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fneg.s fa5, fa5
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; RV32IZFBFMIN-LABEL: fmsub_s:
+; RV32IZFBFMIN: # %bb.0:
+; RV32IZFBFMIN-NEXT: addi sp, sp, -16
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT: fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT: flh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: ret
+;
+; RV64IZFBFMIN-LABEL: fmsub_s:
+; RV64IZFBFMIN: # %bb.0:
+; RV64IZFBFMIN-NEXT: addi sp, sp, -16
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT: fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT: xori a0, a0, 128
+; RV64IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT: flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa0
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: ret
%c_ = fadd bfloat 0.0, %c ; avoid negation using xor
%negc = fsub bfloat -0.0, %c_
%1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %negc)
@@ -261,27 +335,61 @@ define bfloat @fmsub_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
}
define bfloat @fnmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_s:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: fmv.w.x fa4, zero
-; CHECK-NEXT: fadd.s fa5, fa5, fa4
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa3, fa2
-; CHECK-NEXT: fadd.s fa4, fa3, fa4
-; CHECK-NEXT: fcvt.bf16.s fa4, fa4
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fneg.s fa5, fa5
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT: fneg.s fa4, fa4
-; CHECK-NEXT: fcvt.bf16.s fa4, fa4
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa3, fa1
-; CHECK-NEXT: fmadd.s fa5, fa5, fa3, fa4
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; RV32IZFBFMIN-LABEL: fnmadd_s:
+; RV32IZFBFMIN: # %bb.0:
+; RV32IZFBFMIN-NEXT: addi sp, sp, -16
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV32IZFBFMIN-NEXT: fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: fsh fa5, 8(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV32IZFBFMIN-NEXT: flh fa4, 8(sp)
+; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT: flh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa1
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa4
+; RV32IZFBFMIN-NEXT: fmadd.s fa5, fa4, fa3, fa5
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_s:
+; RV64IZFBFMIN: # %bb.0:
+; RV64IZFBFMIN-NEXT: addi sp, sp, -16
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64IZFBFMIN-NEXT: fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT: fsh fa5, 0(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 1(sp)
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT: xori a0, a0, 128
+; RV64IZFBFMIN-NEXT: sb a0, 1(sp)
+; RV64IZFBFMIN-NEXT: flh fa4, 0(sp)
+; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT: xori a0, a0, 128
+; RV64IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT: flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa1
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa4
+; RV64IZFBFMIN-NEXT: fmadd.s fa5, fa4, fa3, fa5
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: ret
%a_ = fadd bfloat 0.0, %a
%c_ = fadd bfloat 0.0, %c
%nega = fsub bfloat -0.0, %a_
@@ -291,27 +399,61 @@ define bfloat @fnmadd_s(bfloat %a, bfloat %b, bfloat %c) nounwind {
}
define bfloat @fnmadd_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_s_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa1
-; CHECK-NEXT: fmv.w.x fa4, zero
-; CHECK-NEXT: fadd.s fa5, fa5, fa4
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa3, fa2
-; CHECK-NEXT: fadd.s fa4, fa3, fa4
-; CHECK-NEXT: fcvt.bf16.s fa4, fa4
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fneg.s fa5, fa5
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT: fneg.s fa4, fa4
-; CHECK-NEXT: fcvt.bf16.s fa4, fa4
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa4
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT: fmadd.s fa5, fa3, fa5, fa4
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; RV32IZFBFMIN-LABEL: fnmadd_s_2:
+; RV32IZFBFMIN: # %bb.0:
+; RV32IZFBFMIN-NEXT: addi sp, sp, -16
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
+; RV32IZFBFMIN-NEXT: fmv.w.x fa4, zero
+; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: fsh fa5, 8(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV32IZFBFMIN-NEXT: flh fa4, 8(sp)
+; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT: flh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa4
+; RV32IZFBFMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
+; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_s_2:
+; RV64IZFBFMIN: # %bb.0:
+; RV64IZFBFMIN-NEXT: addi sp, sp, -16
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
+; RV64IZFBFMIN-NEXT: fmv.w.x fa4, zero
+; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT: fsh fa5, 0(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 1(sp)
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT: fadd.s fa5, fa5, fa4
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT: xori a0, a0, 128
+; RV64IZFBFMIN-NEXT: sb a0, 1(sp)
+; RV64IZFBFMIN-NEXT: flh fa4, 0(sp)
+; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT: xori a0, a0, 128
+; RV64IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT: flh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa0
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa4
+; RV64IZFBFMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
+; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: ret
%b_ = fadd bfloat 0.0, %b
%c_ = fadd bfloat 0.0, %c
%negb = fsub bfloat -0.0, %b_
@@ -321,17 +463,37 @@ define bfloat @fnmadd_s_2(bfloat %a, bfloat %b, bfloat %c) nounwind {
}
define bfloat @fnmadd_s_3(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_s_3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa2
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fneg.s fa5, fa5
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; RV32IZFBFMIN-LABEL: fnmadd_s_3:
+; RV32IZFBFMIN: # %bb.0:
+; RV32IZFBFMIN-NEXT: addi sp, sp, -16
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT: flh fa0, 12(sp)
+; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_s_3:
+; RV64IZFBFMIN: # %bb.0:
+; RV64IZFBFMIN-NEXT: addi sp, sp, -16
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1
+; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa0
+; RV64IZFBFMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
+; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
+; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT: xori a0, a0, 128
+; RV64IZFBFMIN-NEXT: sb a0, 9(sp)
+; RV64IZFBFMIN-NEXT: flh fa0, 8(sp)
+; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: ret
%1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
%neg = fneg bfloat %1
ret bfloat %neg
@@ -339,38 +501,82 @@ define bfloat @fnmadd_s_3(bfloat %a, bfloat %b, bfloat %c) nounwind {
define bfloat @fnmadd_nsz(bfloat %a, bfloat %b, bfloat %c) nounwind {
-; CHECK-LABEL: fnmadd_nsz:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa2
-; CHECK-NEXT: fcvt.s.bf16 fa4, fa1
-; CHECK-NEXT: fcvt.s.bf16 fa3, fa0
-; CHECK-NEXT: fmadd.s fa5, fa3, fa4, fa5
-; CHECK-NEXT: fcvt.bf16.s fa5, fa5
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
-; CHECK-NEXT: fneg.s fa5, fa5
-; CHECK-NEXT: fcvt.bf16.s fa0, fa5
-; CHECK-NEXT: ret
+; RV32IZFBFMIN-LABEL: fnmadd_nsz:
+; RV32IZFBFMIN: # %bb.0:
+; RV32IZFBFMIN-NEXT: addi sp, sp, -16
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa2
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa1
+; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa3, fa0
+; RV32IZFBFMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
+; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
+; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
+; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT: xori a0, a0, 128
+; RV32IZFBFMIN-NEXT: sb a0, 13(sp)
+; RV32IZFBFMIN-NEXT: flh fa0, 12(sp)
+; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: ret
+;
+; RV64IZFBFMIN-LABEL: fnmadd_nsz:
+; RV64IZFBFMIN: # %bb.0:
+; RV64IZFBFMIN-NEXT: addi sp, sp, -16
+...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff a7ba73bf614f6d147bd1cdaddee156bd85e31703 ebfddfd88d794efbe8834644685ba6c186ad8d52 --extensions cpp -- llvm/lib/Target/RISCV/RISCVISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5f18207262..41e8cdd961 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -390,19 +390,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
static const unsigned ZfhminZfbfminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM,
- ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM,
- ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA,
- ISD::FDIV, ISD::FSQRT,
- ISD::STRICT_FMA, ISD::STRICT_FADD,
- ISD::STRICT_FSUB, ISD::STRICT_FMUL,
- ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
- ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
- ISD::SETCC, ISD::FCEIL,
- ISD::FFLOOR, ISD::FTRUNC,
- ISD::FRINT, ISD::FROUND,
- ISD::FROUNDEVEN, ISD::SELECT};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
+ ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV,
+ ISD::FSQRT, ISD::STRICT_FMA, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
+ ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
+ ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
+ ISD::FROUNDEVEN, ISD::SELECT};
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also note that FCOPYSIGN has the same issue
@@ -416,6 +415,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, | |||
setOperationAction(ISD::BR_CC, MVT::bf16, Expand); | |||
setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote); | |||
setOperationAction(ISD::FREM, MVT::bf16, Promote); | |||
setOperationAction(ISD::FABS, MVT::bf16, Expand); | |||
setOperationAction(ISD::FNEG, MVT::bf16, Expand); | |||
// FIXME: Need to promote bf16 FCOPYSIGN to f32, but the | |||
// DAGCombiner::visitFP_ROUND probably needs improvements first. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment should be removed now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
fneg/fabs are not supposed to canonicalize nans. Promoting to f32 will go through an fp_extend which will canonicalize. The generic Promote handler needs to be removed from LegalizeDAG.
We need to use integer bit manip to clear the bit instead.
Unfortunately, this is going through the stack due to i16 not being a legal type. Fixing that will require custom legalization or some other generic SelectionDAG change.
CC: @v01dXYZ