-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Custom legalize f16/bf16 FCOPYSIGN with Zfhmin/Zbfmin. #107039
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The LegalizeDAG expansion will go through memory since i16 isn't a legal type. Avoid this by using FMV nodes. Similar to what we did for llvm#106886 for FNEG and FABS. Special care is needed to handle the Sign operand being a different type.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThe LegalizeDAG expansion will go through memory since i16 isn't a legal type. Avoid this by using FMV nodes. Similar to what we did for #106886 for FNEG and FABS. Special care is needed to handle the Sign operand being a different type. Patch is 37.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107039.diff 6 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 250d1c60b9f59e..f50d378ed97aa6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -459,7 +459,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM, MVT::bf16, Promote);
setOperationAction(ISD::FABS, MVT::bf16, Custom);
setOperationAction(ISD::FNEG, MVT::bf16, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Custom);
}
if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
@@ -477,7 +477,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(Op, MVT::f16, Custom);
setOperationAction(ISD::FABS, MVT::f16, Custom);
setOperationAction(ISD::FNEG, MVT::f16, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
}
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
@@ -5964,6 +5964,69 @@ static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
}
+static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op.getSimpleValueType();
+ assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
+
+ SDValue Mag = Op.getOperand(0);
+ SDValue Sign = Op.getOperand(1);
+
+ SDLoc DL(Op);
+
+ // Get sign bit into an integer value.
+ SDValue SignAsInt;
+ unsigned SignSize = Sign.getValueSizeInBits();
+ if (SignSize == Subtarget.getXLen()) {
+ SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
+ } else if (SignSize == 16) {
+ SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
+ } else if (SignSize == 32) {
+ SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
+ } else if (SignSize == 64) {
+ assert(XLenVT == MVT::i32 && "Unexpected type");
+ // Copy the upper word to integer.
+ SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
+ .getValue(1);
+ SignSize = 32;
+ } else
+ llvm_unreachable("Unexpected sign size");
+
+ // Get the signbit at the right position for MagAsInt.
+ int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
+ if (ShiftAmount > 0) {
+ SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
+ DAG.getConstant(ShiftAmount, DL, XLenVT));
+ } else if (ShiftAmount < 0) {
+ SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
+ DAG.getConstant(-ShiftAmount, DL, XLenVT));
+ }
+
+ // Mask the sign bit and any bits above it. The extra bits will be dropped
+ // when we convert back to FP.
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
+
+ // Transform Mag value to integer, and clear the sign bit.
+ SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
+ SDValue ClearedSign =
+ DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
+
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
+
+ SDValue CopiedSign =
+ DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit, Flags);
+
+ return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
+}
+
/// Get a RISC-V target specified VL op for a given SDNode.
static unsigned getRISCVVLOp(SDValue Op) {
#define OP_CASE(NODE) \
@@ -7164,6 +7227,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
+ if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
+ return lowerFCOPYSIGN(Op, DAG, Subtarget);
if (Op.getValueType() == MVT::nxv32f16 &&
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16()))
diff --git a/llvm/test/CodeGen/RISCV/bfloat-arith.ll b/llvm/test/CodeGen/RISCV/bfloat-arith.ll
index 965d7e0132e60b..b688af4234e65a 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-arith.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-arith.ll
@@ -73,42 +73,26 @@ declare bfloat @llvm.copysign.bf16(bfloat, bfloat)
define bfloat @fsgnj_bf16(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: fsgnj_bf16:
; RV32IZFBFMIN: # %bb.0:
-; RV32IZFBFMIN-NEXT: addi sp, sp, -16
-; RV32IZFBFMIN-NEXT: fsh fa1, 12(sp)
-; RV32IZFBFMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT: fmv.x.h a0, fa1
+; RV32IZFBFMIN-NEXT: lui a1, 1048568
+; RV32IZFBFMIN-NEXT: and a0, a0, a1
; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa0
; RV32IZFBFMIN-NEXT: slli a1, a1, 17
-; RV32IZFBFMIN-NEXT: andi a2, a0, 128
-; RV32IZFBFMIN-NEXT: srli a0, a1, 17
-; RV32IZFBFMIN-NEXT: beqz a2, .LBB5_2
-; RV32IZFBFMIN-NEXT: # %bb.1:
-; RV32IZFBFMIN-NEXT: lui a1, 1048568
-; RV32IZFBFMIN-NEXT: or a0, a0, a1
-; RV32IZFBFMIN-NEXT: .LBB5_2:
-; RV32IZFBFMIN-NEXT: fmv.h.x fa5, a0
-; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
-; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
-; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: srli a1, a1, 17
+; RV32IZFBFMIN-NEXT: or a0, a1, a0
+; RV32IZFBFMIN-NEXT: fmv.h.x fa0, a0
; RV32IZFBFMIN-NEXT: ret
;
; RV64IZFBFMIN-LABEL: fsgnj_bf16:
; RV64IZFBFMIN: # %bb.0:
-; RV64IZFBFMIN-NEXT: addi sp, sp, -16
-; RV64IZFBFMIN-NEXT: fsh fa1, 8(sp)
-; RV64IZFBFMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT: fmv.x.h a0, fa1
+; RV64IZFBFMIN-NEXT: lui a1, 1048568
+; RV64IZFBFMIN-NEXT: and a0, a0, a1
; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa0
; RV64IZFBFMIN-NEXT: slli a1, a1, 49
-; RV64IZFBFMIN-NEXT: andi a2, a0, 128
-; RV64IZFBFMIN-NEXT: srli a0, a1, 49
-; RV64IZFBFMIN-NEXT: beqz a2, .LBB5_2
-; RV64IZFBFMIN-NEXT: # %bb.1:
-; RV64IZFBFMIN-NEXT: lui a1, 1048568
-; RV64IZFBFMIN-NEXT: or a0, a0, a1
-; RV64IZFBFMIN-NEXT: .LBB5_2:
-; RV64IZFBFMIN-NEXT: fmv.h.x fa5, a0
-; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
-; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
-; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: srli a1, a1, 49
+; RV64IZFBFMIN-NEXT: or a0, a1, a0
+; RV64IZFBFMIN-NEXT: fmv.h.x fa0, a0
; RV64IZFBFMIN-NEXT: ret
%1 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b)
ret bfloat %1
@@ -138,62 +122,36 @@ define i32 @fneg_bf16(bfloat %a, bfloat %b) nounwind {
define bfloat @fsgnjn_bf16(bfloat %a, bfloat %b) nounwind {
; RV32IZFBFMIN-LABEL: fsgnjn_bf16:
; RV32IZFBFMIN: # %bb.0:
-; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
; RV32IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
-; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa5
-; RV32IZFBFMIN-NEXT: lui a0, 1048568
-; RV32IZFBFMIN-NEXT: xor a1, a1, a0
-; RV32IZFBFMIN-NEXT: fmv.h.x fa5, a1
-; RV32IZFBFMIN-NEXT: fsh fa5, 12(sp)
-; RV32IZFBFMIN-NEXT: lbu a1, 13(sp)
-; RV32IZFBFMIN-NEXT: fmv.x.h a2, fa0
-; RV32IZFBFMIN-NEXT: slli a2, a2, 17
-; RV32IZFBFMIN-NEXT: andi a3, a1, 128
-; RV32IZFBFMIN-NEXT: srli a1, a2, 17
-; RV32IZFBFMIN-NEXT: bnez a3, .LBB7_2
-; RV32IZFBFMIN-NEXT: # %bb.1:
-; RV32IZFBFMIN-NEXT: fmv.h.x fa5, a1
-; RV32IZFBFMIN-NEXT: j .LBB7_3
-; RV32IZFBFMIN-NEXT: .LBB7_2:
+; RV32IZFBFMIN-NEXT: fmv.x.h a0, fa5
+; RV32IZFBFMIN-NEXT: not a0, a0
+; RV32IZFBFMIN-NEXT: lui a1, 1048568
+; RV32IZFBFMIN-NEXT: and a0, a0, a1
+; RV32IZFBFMIN-NEXT: fmv.x.h a1, fa0
+; RV32IZFBFMIN-NEXT: slli a1, a1, 17
+; RV32IZFBFMIN-NEXT: srli a1, a1, 17
; RV32IZFBFMIN-NEXT: or a0, a1, a0
-; RV32IZFBFMIN-NEXT: fmv.h.x fa5, a0
-; RV32IZFBFMIN-NEXT: .LBB7_3:
-; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
-; RV32IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
-; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: fmv.h.x fa0, a0
; RV32IZFBFMIN-NEXT: ret
;
; RV64IZFBFMIN-LABEL: fsgnjn_bf16:
; RV64IZFBFMIN: # %bb.0:
-; RV64IZFBFMIN-NEXT: addi sp, sp, -16
; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa1
; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0
; RV64IZFBFMIN-NEXT: fadd.s fa5, fa4, fa5
; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa5, fa5
-; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa5
-; RV64IZFBFMIN-NEXT: lui a0, 1048568
-; RV64IZFBFMIN-NEXT: xor a1, a1, a0
-; RV64IZFBFMIN-NEXT: fmv.h.x fa5, a1
-; RV64IZFBFMIN-NEXT: fsh fa5, 8(sp)
-; RV64IZFBFMIN-NEXT: lbu a1, 9(sp)
-; RV64IZFBFMIN-NEXT: fmv.x.h a2, fa0
-; RV64IZFBFMIN-NEXT: slli a2, a2, 49
-; RV64IZFBFMIN-NEXT: andi a3, a1, 128
-; RV64IZFBFMIN-NEXT: srli a1, a2, 49
-; RV64IZFBFMIN-NEXT: bnez a3, .LBB7_2
-; RV64IZFBFMIN-NEXT: # %bb.1:
-; RV64IZFBFMIN-NEXT: fmv.h.x fa5, a1
-; RV64IZFBFMIN-NEXT: j .LBB7_3
-; RV64IZFBFMIN-NEXT: .LBB7_2:
+; RV64IZFBFMIN-NEXT: fmv.x.h a0, fa5
+; RV64IZFBFMIN-NEXT: not a0, a0
+; RV64IZFBFMIN-NEXT: lui a1, 1048568
+; RV64IZFBFMIN-NEXT: and a0, a0, a1
+; RV64IZFBFMIN-NEXT: fmv.x.h a1, fa0
+; RV64IZFBFMIN-NEXT: slli a1, a1, 49
+; RV64IZFBFMIN-NEXT: srli a1, a1, 49
; RV64IZFBFMIN-NEXT: or a0, a1, a0
-; RV64IZFBFMIN-NEXT: fmv.h.x fa5, a0
-; RV64IZFBFMIN-NEXT: .LBB7_3:
-; RV64IZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa5
-; RV64IZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
-; RV64IZFBFMIN-NEXT: addi sp, sp, 16
+; RV64IZFBFMIN-NEXT: fmv.h.x fa0, a0
; RV64IZFBFMIN-NEXT: ret
%1 = fadd bfloat %a, %b
%2 = fneg bfloat %1
diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll
index 12c35b87dc6771..d8019c0ad61123 100644
--- a/llvm/test/CodeGen/RISCV/copysign-casts.ll
+++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll
@@ -503,54 +503,38 @@ define half @fold_demote_h_s(half %a, float %b) nounwind {
;
; RV32IFZFHMIN-LABEL: fold_demote_h_s:
; RV32IFZFHMIN: # %bb.0:
-; RV32IFZFHMIN-NEXT: fmv.x.h a0, fa0
-; RV32IFZFHMIN-NEXT: slli a0, a0, 17
-; RV32IFZFHMIN-NEXT: fmv.x.w a1, fa1
-; RV32IFZFHMIN-NEXT: srli a0, a0, 17
-; RV32IFZFHMIN-NEXT: bgez a1, .LBB4_2
-; RV32IFZFHMIN-NEXT: # %bb.1:
-; RV32IFZFHMIN-NEXT: lui a1, 1048568
-; RV32IFZFHMIN-NEXT: or a0, a0, a1
-; RV32IFZFHMIN-NEXT: .LBB4_2:
-; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32IFZFHMIN-NEXT: fcvt.s.h fa5, fa5
-; RV32IFZFHMIN-NEXT: fcvt.h.s fa0, fa5
+; RV32IFZFHMIN-NEXT: fmv.x.w a0, fa1
+; RV32IFZFHMIN-NEXT: srli a0, a0, 31
+; RV32IFZFHMIN-NEXT: slli a0, a0, 15
+; RV32IFZFHMIN-NEXT: fmv.x.h a1, fa0
+; RV32IFZFHMIN-NEXT: slli a1, a1, 17
+; RV32IFZFHMIN-NEXT: srli a1, a1, 17
+; RV32IFZFHMIN-NEXT: or a0, a1, a0
+; RV32IFZFHMIN-NEXT: fmv.h.x fa0, a0
; RV32IFZFHMIN-NEXT: ret
;
; RV32IFDZFHMIN-LABEL: fold_demote_h_s:
; RV32IFDZFHMIN: # %bb.0:
-; RV32IFDZFHMIN-NEXT: fmv.x.h a0, fa0
-; RV32IFDZFHMIN-NEXT: slli a0, a0, 17
-; RV32IFDZFHMIN-NEXT: fmv.x.w a1, fa1
-; RV32IFDZFHMIN-NEXT: srli a0, a0, 17
-; RV32IFDZFHMIN-NEXT: bgez a1, .LBB4_2
-; RV32IFDZFHMIN-NEXT: # %bb.1:
-; RV32IFDZFHMIN-NEXT: lui a1, 1048568
-; RV32IFDZFHMIN-NEXT: or a0, a0, a1
-; RV32IFDZFHMIN-NEXT: .LBB4_2:
-; RV32IFDZFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
-; RV32IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
+; RV32IFDZFHMIN-NEXT: fmv.x.w a0, fa1
+; RV32IFDZFHMIN-NEXT: srli a0, a0, 31
+; RV32IFDZFHMIN-NEXT: slli a0, a0, 15
+; RV32IFDZFHMIN-NEXT: fmv.x.h a1, fa0
+; RV32IFDZFHMIN-NEXT: slli a1, a1, 17
+; RV32IFDZFHMIN-NEXT: srli a1, a1, 17
+; RV32IFDZFHMIN-NEXT: or a0, a1, a0
+; RV32IFDZFHMIN-NEXT: fmv.h.x fa0, a0
; RV32IFDZFHMIN-NEXT: ret
;
; RV64IFDZFHMIN-LABEL: fold_demote_h_s:
; RV64IFDZFHMIN: # %bb.0:
-; RV64IFDZFHMIN-NEXT: addi sp, sp, -16
-; RV64IFDZFHMIN-NEXT: fsw fa1, 8(sp)
-; RV64IFDZFHMIN-NEXT: lbu a0, 11(sp)
+; RV64IFDZFHMIN-NEXT: fmv.x.w a0, fa1
+; RV64IFDZFHMIN-NEXT: srli a0, a0, 31
+; RV64IFDZFHMIN-NEXT: slli a0, a0, 15
; RV64IFDZFHMIN-NEXT: fmv.x.h a1, fa0
; RV64IFDZFHMIN-NEXT: slli a1, a1, 49
-; RV64IFDZFHMIN-NEXT: andi a2, a0, 128
-; RV64IFDZFHMIN-NEXT: srli a0, a1, 49
-; RV64IFDZFHMIN-NEXT: beqz a2, .LBB4_2
-; RV64IFDZFHMIN-NEXT: # %bb.1:
-; RV64IFDZFHMIN-NEXT: lui a1, 1048568
-; RV64IFDZFHMIN-NEXT: or a0, a0, a1
-; RV64IFDZFHMIN-NEXT: .LBB4_2:
-; RV64IFDZFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
-; RV64IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
-; RV64IFDZFHMIN-NEXT: addi sp, sp, 16
+; RV64IFDZFHMIN-NEXT: srli a1, a1, 49
+; RV64IFDZFHMIN-NEXT: or a0, a1, a0
+; RV64IFDZFHMIN-NEXT: fmv.h.x fa0, a0
; RV64IFDZFHMIN-NEXT: ret
%c = fptrunc float %b to half
%t = call half @llvm.copysign.f16(half %a, half %c)
@@ -646,60 +630,40 @@ define half @fold_demote_h_d(half %a, double %b) nounwind {
;
; RV32IFZFHMIN-LABEL: fold_demote_h_d:
; RV32IFZFHMIN: # %bb.0:
-; RV32IFZFHMIN-NEXT: addi sp, sp, -16
-; RV32IFZFHMIN-NEXT: srli a1, a1, 16
-; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32IFZFHMIN-NEXT: fsh fa5, 12(sp)
-; RV32IFZFHMIN-NEXT: lbu a0, 13(sp)
-; RV32IFZFHMIN-NEXT: fmv.x.h a1, fa0
-; RV32IFZFHMIN-NEXT: slli a1, a1, 17
-; RV32IFZFHMIN-NEXT: andi a2, a0, 128
-; RV32IFZFHMIN-NEXT: srli a0, a1, 17
-; RV32IFZFHMIN-NEXT: beqz a2, .LBB5_2
-; RV32IFZFHMIN-NEXT: # %bb.1:
-; RV32IFZFHMIN-NEXT: lui a1, 1048568
+; RV32IFZFHMIN-NEXT: srli a1, a1, 31
+; RV32IFZFHMIN-NEXT: slli a1, a1, 15
+; RV32IFZFHMIN-NEXT: fmv.x.h a0, fa0
+; RV32IFZFHMIN-NEXT: slli a0, a0, 17
+; RV32IFZFHMIN-NEXT: srli a0, a0, 17
; RV32IFZFHMIN-NEXT: or a0, a0, a1
-; RV32IFZFHMIN-NEXT: .LBB5_2:
-; RV32IFZFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32IFZFHMIN-NEXT: fcvt.s.h fa5, fa5
-; RV32IFZFHMIN-NEXT: fcvt.h.s fa0, fa5
-; RV32IFZFHMIN-NEXT: addi sp, sp, 16
+; RV32IFZFHMIN-NEXT: fmv.h.x fa0, a0
; RV32IFZFHMIN-NEXT: ret
;
; RV32IFDZFHMIN-LABEL: fold_demote_h_d:
; RV32IFDZFHMIN: # %bb.0:
; RV32IFDZFHMIN-NEXT: addi sp, sp, -16
; RV32IFDZFHMIN-NEXT: fsd fa1, 8(sp)
-; RV32IFDZFHMIN-NEXT: lbu a0, 15(sp)
+; RV32IFDZFHMIN-NEXT: lw a0, 12(sp)
+; RV32IFDZFHMIN-NEXT: srli a0, a0, 31
+; RV32IFDZFHMIN-NEXT: slli a0, a0, 15
; RV32IFDZFHMIN-NEXT: fmv.x.h a1, fa0
; RV32IFDZFHMIN-NEXT: slli a1, a1, 17
-; RV32IFDZFHMIN-NEXT: andi a2, a0, 128
-; RV32IFDZFHMIN-NEXT: srli a0, a1, 17
-; RV32IFDZFHMIN-NEXT: beqz a2, .LBB5_2
-; RV32IFDZFHMIN-NEXT: # %bb.1:
-; RV32IFDZFHMIN-NEXT: lui a1, 1048568
-; RV32IFDZFHMIN-NEXT: or a0, a0, a1
-; RV32IFDZFHMIN-NEXT: .LBB5_2:
-; RV32IFDZFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
-; RV32IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
+; RV32IFDZFHMIN-NEXT: srli a1, a1, 17
+; RV32IFDZFHMIN-NEXT: or a0, a1, a0
+; RV32IFDZFHMIN-NEXT: fmv.h.x fa0, a0
; RV32IFDZFHMIN-NEXT: addi sp, sp, 16
; RV32IFDZFHMIN-NEXT: ret
;
; RV64IFDZFHMIN-LABEL: fold_demote_h_d:
; RV64IFDZFHMIN: # %bb.0:
-; RV64IFDZFHMIN-NEXT: fmv.x.h a0, fa0
-; RV64IFDZFHMIN-NEXT: slli a0, a0, 49
-; RV64IFDZFHMIN-NEXT: fmv.x.d a1, fa1
-; RV64IFDZFHMIN-NEXT: srli a0, a0, 49
-; RV64IFDZFHMIN-NEXT: bgez a1, .LBB5_2
-; RV64IFDZFHMIN-NEXT: # %bb.1:
-; RV64IFDZFHMIN-NEXT: lui a1, 1048568
-; RV64IFDZFHMIN-NEXT: or a0, a0, a1
-; RV64IFDZFHMIN-NEXT: .LBB5_2:
-; RV64IFDZFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64IFDZFHMIN-NEXT: fcvt.s.h fa5, fa5
-; RV64IFDZFHMIN-NEXT: fcvt.h.s fa0, fa5
+; RV64IFDZFHMIN-NEXT: fmv.x.d a0, fa1
+; RV64IFDZFHMIN-NEXT: srli a0, a0, 63
+; RV64IFDZFHMIN-NEXT: slli a0, a0, 15
+; RV64IFDZFHMIN-NEXT: fmv.x.h a1, fa0
+; RV64IFDZFHMIN-NEXT: slli a1, a1, 49
+; RV64IFDZFHMIN-NEXT: srli a1, a1, 49
+; RV64IFDZFHMIN-NEXT: or a0, a1, a0
+; RV64IFDZFHMIN-NEXT: fmv.h.x fa0, a0
; RV64IFDZFHMIN-NEXT: ret
%c = fptrunc double %b to half
%t = call half @llvm.copysign.f16(half %a, half %c)
diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll
index 4c64e6c7a20906..f8522b09970bf9 100644
--- a/llvm/test/CodeGen/RISCV/half-arith.ll
+++ b/llvm/test/CodeGen/RISCV/half-arith.ll
@@ -442,78 +442,44 @@ define half @fsgnj_h(half %a, half %b) nounwind {
;
; RV32IZFHMIN-LABEL: fsgnj_h:
; RV32IZFHMIN: # %bb.0:
-; RV32IZFHMIN-NEXT: addi sp, sp, -16
-; RV32IZFHMIN-NEXT: fsh fa1, 12(sp)
-; RV32IZFHMIN-NEXT: lbu a0, 13(sp)
+; RV32IZFHMIN-NEXT: fmv.x.h a0, fa1
+; RV32IZFHMIN-NEXT: lui a1, 1048568
+; RV32IZFHMIN-NEXT: and a0, a0, a1
; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0
; RV32IZFHMIN-NEXT: slli a1, a1, 17
-; RV32IZFHMIN-NEXT: andi a2, a0, 128
-; RV32IZFHMIN-NEXT: srli a0, a1, 17
-; RV32IZFHMIN-NEXT: beqz a2, .LBB5_2
-; RV32IZFHMIN-NEXT: # %bb.1:
-; RV32IZFHMIN-NEXT: lui a1, 1048568
-; RV32IZFHMIN-NEXT: or a0, a0, a1
-; RV32IZFHMIN-NEXT: .LBB5_2:
-; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5
-; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa5
-; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: srli a1, a1, 17
+; RV32IZFHMIN-NEXT: or a0, a1, a0
+; RV32IZFHMIN-NEXT: fmv.h.x fa0, a0
; RV32IZFHMIN-NEXT: ret
;
; RV64IZFHMIN-LABEL: fsgnj_h:
; RV64IZFHMIN: # %bb.0:
-; RV64IZFHMIN-NEXT: addi sp, sp, -16
-; RV64IZFHMIN-NEXT: fsh fa1, 8(sp)
-; RV64IZFHMIN-NEXT: lbu a0, 9(sp)
+; RV64IZFHMIN-NEXT: fmv.x.h a0, fa1
+; RV64IZFHMIN-NEXT: lui a1, 1048568
+; RV64IZFHMIN-NEXT: and a0, a0, a1
; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0
; RV64IZFHMIN-NEXT: slli a1, a1, 49
-; RV64IZFHMIN-NEXT: andi a2, a0, 128
-; RV64IZFHMIN-NEXT: srli a0, a1, 49
-; RV64IZFHMIN-NEXT: beqz a2, .LBB5_2
-; RV64IZFHMIN-NEXT: # %bb.1:
-; RV64IZFHMIN-NEXT: lui a1, 1048568
-; RV64IZFHMIN-NEXT: or a0, a0, a1
-; RV64IZFHMIN-NEXT: .LBB5_2:
-; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5
-; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa5
-; RV64IZFHMIN-NEXT: addi sp, sp, 16
+; RV64IZFHMIN-NEXT: srli a1, a1, 49
+; RV64IZFHMIN-NEXT: or a0, a1, a0
+; RV64IZFHMIN-NEXT: fmv.h.x fa0, a0
; RV64IZFHMIN-NEXT: ret
;
; RV32IZHINXMIN-LABEL: fsgnj_h:
; RV32IZHINXMIN: # %bb.0:
-; RV32IZHINXMIN-NEXT: addi sp, sp, -16
-; RV32IZHINXMIN-NEXT: sh a1, 12(sp)
-; RV32IZHINXMIN-NEXT: lbu a1, 13(sp)
+; RV32IZHINXMIN-NEXT: lui a2, 1048568
+; RV32IZHINXMIN-NEXT: and a1, a1, a2
; RV32IZHINXMIN-NEXT: slli a0, a0, 17
-; RV32IZHINXMIN-NEXT: andi a1, a1, 128
; RV32IZHINXMIN-NEXT: srli a0, a0, 17
-; RV32IZHINXMIN-NEXT: beqz a1, .LBB5_2
-; RV32IZHINXMIN-NEXT: # %bb.1:
-; RV32IZHINXMIN-NEXT: lui a1, 1048568
; RV32IZHINXMIN-NEXT: or a0, a0, a1
-; RV32IZHINXMIN-NEXT: .LBB5_2:
-; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0
-; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
-; RV32IZHINXMIN-NEXT: addi sp, sp, 16
; RV32IZHINXMIN-NEXT: ret
;
; RV64IZHINXMIN-LABEL: fsgnj_h:
; RV64IZHINXMIN: # %bb.0:
-; RV64IZHINXMIN-NEXT: addi sp, sp, -16
-; RV64IZHINXMIN-NEXT: sh a1, 8(sp)
-; RV64IZHINXMIN-NEXT: lbu a1, 9(sp)
+; RV64IZHINXMIN-NEXT: lui a2, 1048568
+; RV64IZHINXMIN-NEXT: and a1, a1, a2
; RV64IZHINXMIN-NEXT: slli a0, a0, 49
-; RV64IZHINXMIN-NEXT: andi a1, a1, 128
; RV64IZHINXMIN-NEXT: srli a0, a0, 49
-; RV64IZHINXMIN-NEXT: beqz a1, .LBB5_2
-; RV64IZHINXMIN-NEXT: # %bb.1:
-; RV64IZHINXMIN-NEXT: lui a1, 1048568
; RV64IZHINXMIN-NEXT: or a0, a0, a1
-; RV64IZHINXMIN-NEXT: .LBB5_2:
-; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
-; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
-; RV64IZHINXMIN-NEXT: addi sp, sp, 16
; RV64IZHINXMIN-NEXT: ret
%1 = call half @llvm.copysign.f16(half %a, half %b)
ret half %1
@@ -725,108 +691,64 @@ define half @fsgnjn_h(half %a, half %b) nounwind {
;
; RV32IZFHMIN-LABEL: fsgnjn_h:
; RV32IZFHMIN: # %bb.0:
-; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa1
; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa0
; RV32IZ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/3400 Here is the relevant piece of the build log for the reference
|
The LegalizeDAG expansion will go through memory since i16 isn't a legal type. Avoid this by using FMV nodes.
Similar to what we did for #106886 for FNEG and FABS. Special care is needed to handle the Sign operand being a different type.