Skip to content

[RISCV] Custom legalize f16/bf16 FCOPYSIGN with Zfhmin/Zbfmin. #107039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 3, 2024

Conversation

topperc
Copy link
Collaborator

@topperc topperc commented Sep 3, 2024

The LegalizeDAG expansion will go through memory since i16 isn't a legal type. Avoid this by using FMV nodes.

Similar to what we did for #106886 for FNEG and FABS. Special care is needed to handle the Sign operand being a different type.

The LegalizeDAG expansion will go through memory since i16 isn't a legal
type. Avoid this by using FMV nodes.

Similar to what we did for llvm#106886 for FNEG and FABS. Special care
is needed to handle the Sign operand being a different type.
@llvmbot
Copy link
Member

llvmbot commented Sep 3, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Craig Topper (topperc)

Changes

The LegalizeDAG expansion will go through memory since i16 isn't a legal type. Avoid this by using FMV nodes.

Similar to what we did for #106886 for FNEG and FABS. Special care is needed to handle the Sign operand being a different type.


Patch is 37.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107039.diff

6 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+67-2)
  • (modified) llvm/test/CodeGen/RISCV/bfloat-arith.ll (+28-70)
  • (modified) llvm/test/CodeGen/RISCV/copysign-casts.ll (+42-78)
  • (modified) llvm/test/CodeGen/RISCV/half-arith.ll (+70-176)
  • (modified) llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll (+12-54)
  • (modified) llvm/test/CodeGen/RISCV/half-intrinsics.ll (+16-50)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 250d1c60b9f59e..f50d378ed97aa6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -459,7 +459,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FREM, MVT::bf16, Promote);
     setOperationAction(ISD::FABS, MVT::bf16, Custom);
     setOperationAction(ISD::FNEG, MVT::bf16, Custom);
-    setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Custom);
   }
 
   if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
@@ -477,7 +477,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction(Op, MVT::f16, Custom);
       setOperationAction(ISD::FABS, MVT::f16, Custom);
       setOperationAction(ISD::FNEG, MVT::f16, Custom);
-      setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+      setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
     }
 
     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
@@ -5964,6 +5964,69 @@ static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG,
   return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
 }
 
+static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG,
+                              const RISCVSubtarget &Subtarget) {
+  assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
+
+  MVT XLenVT = Subtarget.getXLenVT();
+  MVT VT = Op.getSimpleValueType();
+  assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
+
+  SDValue Mag = Op.getOperand(0);
+  SDValue Sign = Op.getOperand(1);
+
+  SDLoc DL(Op);
+
+  // Get sign bit into an integer value.
+  SDValue SignAsInt;
+  unsigned SignSize = Sign.getValueSizeInBits();
+  if (SignSize == Subtarget.getXLen()) {
+    SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
+  } else if (SignSize == 16) {
+    SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
+  } else if (SignSize == 32) {
+    SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
+  } else if (SignSize == 64) {
+    assert(XLenVT == MVT::i32 && "Unexpected type");
+    // Copy the upper word to integer.
+    SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
+                    .getValue(1);
+    SignSize = 32;
+  } else
+    llvm_unreachable("Unexpected sign size");
+
+  // Get the signbit at the right position for MagAsInt.
+  int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
+  if (ShiftAmount > 0) {
+    SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
+                            DAG.getConstant(ShiftAmount, DL, XLenVT));
+  } else if (ShiftAmount < 0) {
+    SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
+                            DAG.getConstant(-ShiftAmount, DL, XLenVT));
+  }
+
+  // Mask the sign bit and any bits above it. The extra bits will be dropped
+  // when we convert back to FP.
+  SDValue SignMask = DAG.getConstant(
+      APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
+  SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
+
+  // Transform Mag value to integer, and clear the sign bit.
+  SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
+  SDValue ClearSignMask = DAG.getConstant(
+      APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
+  SDValue ClearedSign =
+      DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
+
+  SDNodeFlags Flags;
+  Flags.setDisjoint(true);
+
+  SDValue CopiedSign =
+      DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit, Flags);
+
+  return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
+}
+
 /// Get a RISC-V target specified VL op for a given SDNode.
 static unsigned getRISCVVLOp(SDValue Op) {
 #define OP_CASE(NODE)                                                          \
@@ -7164,6 +7227,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   case ISD::VSELECT:
     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
   case ISD::FCOPYSIGN:
+    if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
+      return lowerFCOPYSIGN(Op, DAG, Subtarget);
     if (Op.getValueType() == MVT::nxv32f16 &&
         (Subtarget.hasVInstructionsF16Minimal() &&
          !Subtarget.hasVInstructionsF16()))
diff --git a/llvm/test/CodeGen/RISCV/bfloat-arith.ll b/llvm/test/CodeGen/RISCV/bfloat-arith.ll
index 965d7e0132e60b..b688af4234e65a 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-arith.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-arith.ll
@@ -73,42 +73,26 @@ declare bfloat @llvm.copysign.bf16(bfloat, bfloat)
 define bfloat @fsgnj_bf16(bfloat %a, bfloat %b) nounwind {
 ; RV32IZFBFMIN-LABEL: fsgnj_bf16:
 ; RV32IZFBFMIN:       # %bb.0:
-; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
-; RV32IZFBFMIN-NEXT:    fsh fa1, 12(sp)
-; RV32IZFBFMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFBFMIN-NEXT:    fmv.x.h a0, fa1
+; RV32IZFBFMIN-NEXT:    lui a1, 1048568
+; RV32IZFBFMIN-NEXT:    and a0, a0, a1
 ; RV32IZFBFMIN-NEXT:    fmv.x.h a1, fa0
 ; RV32IZFBFMIN-NEXT:    slli a1, a1, 17
-; RV32IZFBFMIN-NEXT:    andi a2, a0, 128
-; RV32IZFBFMIN-NEXT:    srli a0, a1, 17
-; RV32IZFBFMIN-NEXT:    beqz a2, .LBB5_2
-; RV32IZFBFMIN-NEXT:  # %bb.1:
-; RV32IZFBFMIN-NEXT:    lui a1, 1048568
-; RV32IZFBFMIN-NEXT:    or a0, a0, a1
-; RV32IZFBFMIN-NEXT:  .LBB5_2:
-; RV32IZFBFMIN-NEXT:    fmv.h.x fa5, a0
-; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
-; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
-; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    srli a1, a1, 17
+; RV32IZFBFMIN-NEXT:    or a0, a1, a0
+; RV32IZFBFMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32IZFBFMIN-NEXT:    ret
 ;
 ; RV64IZFBFMIN-LABEL: fsgnj_bf16:
 ; RV64IZFBFMIN:       # %bb.0:
-; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
-; RV64IZFBFMIN-NEXT:    fsh fa1, 8(sp)
-; RV64IZFBFMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFBFMIN-NEXT:    fmv.x.h a0, fa1
+; RV64IZFBFMIN-NEXT:    lui a1, 1048568
+; RV64IZFBFMIN-NEXT:    and a0, a0, a1
 ; RV64IZFBFMIN-NEXT:    fmv.x.h a1, fa0
 ; RV64IZFBFMIN-NEXT:    slli a1, a1, 49
-; RV64IZFBFMIN-NEXT:    andi a2, a0, 128
-; RV64IZFBFMIN-NEXT:    srli a0, a1, 49
-; RV64IZFBFMIN-NEXT:    beqz a2, .LBB5_2
-; RV64IZFBFMIN-NEXT:  # %bb.1:
-; RV64IZFBFMIN-NEXT:    lui a1, 1048568
-; RV64IZFBFMIN-NEXT:    or a0, a0, a1
-; RV64IZFBFMIN-NEXT:  .LBB5_2:
-; RV64IZFBFMIN-NEXT:    fmv.h.x fa5, a0
-; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
-; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
-; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    srli a1, a1, 49
+; RV64IZFBFMIN-NEXT:    or a0, a1, a0
+; RV64IZFBFMIN-NEXT:    fmv.h.x fa0, a0
 ; RV64IZFBFMIN-NEXT:    ret
   %1 = call bfloat @llvm.copysign.bf16(bfloat %a, bfloat %b)
   ret bfloat %1
@@ -138,62 +122,36 @@ define i32 @fneg_bf16(bfloat %a, bfloat %b) nounwind {
 define bfloat @fsgnjn_bf16(bfloat %a, bfloat %b) nounwind {
 ; RV32IZFBFMIN-LABEL: fsgnjn_bf16:
 ; RV32IZFBFMIN:       # %bb.0:
-; RV32IZFBFMIN-NEXT:    addi sp, sp, -16
 ; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
 ; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
 ; RV32IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
 ; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
-; RV32IZFBFMIN-NEXT:    fmv.x.h a1, fa5
-; RV32IZFBFMIN-NEXT:    lui a0, 1048568
-; RV32IZFBFMIN-NEXT:    xor a1, a1, a0
-; RV32IZFBFMIN-NEXT:    fmv.h.x fa5, a1
-; RV32IZFBFMIN-NEXT:    fsh fa5, 12(sp)
-; RV32IZFBFMIN-NEXT:    lbu a1, 13(sp)
-; RV32IZFBFMIN-NEXT:    fmv.x.h a2, fa0
-; RV32IZFBFMIN-NEXT:    slli a2, a2, 17
-; RV32IZFBFMIN-NEXT:    andi a3, a1, 128
-; RV32IZFBFMIN-NEXT:    srli a1, a2, 17
-; RV32IZFBFMIN-NEXT:    bnez a3, .LBB7_2
-; RV32IZFBFMIN-NEXT:  # %bb.1:
-; RV32IZFBFMIN-NEXT:    fmv.h.x fa5, a1
-; RV32IZFBFMIN-NEXT:    j .LBB7_3
-; RV32IZFBFMIN-NEXT:  .LBB7_2:
+; RV32IZFBFMIN-NEXT:    fmv.x.h a0, fa5
+; RV32IZFBFMIN-NEXT:    not a0, a0
+; RV32IZFBFMIN-NEXT:    lui a1, 1048568
+; RV32IZFBFMIN-NEXT:    and a0, a0, a1
+; RV32IZFBFMIN-NEXT:    fmv.x.h a1, fa0
+; RV32IZFBFMIN-NEXT:    slli a1, a1, 17
+; RV32IZFBFMIN-NEXT:    srli a1, a1, 17
 ; RV32IZFBFMIN-NEXT:    or a0, a1, a0
-; RV32IZFBFMIN-NEXT:    fmv.h.x fa5, a0
-; RV32IZFBFMIN-NEXT:  .LBB7_3:
-; RV32IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
-; RV32IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
-; RV32IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV32IZFBFMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32IZFBFMIN-NEXT:    ret
 ;
 ; RV64IZFBFMIN-LABEL: fsgnjn_bf16:
 ; RV64IZFBFMIN:       # %bb.0:
-; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
 ; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa1
 ; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
 ; RV64IZFBFMIN-NEXT:    fadd.s fa5, fa4, fa5
 ; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
-; RV64IZFBFMIN-NEXT:    fmv.x.h a1, fa5
-; RV64IZFBFMIN-NEXT:    lui a0, 1048568
-; RV64IZFBFMIN-NEXT:    xor a1, a1, a0
-; RV64IZFBFMIN-NEXT:    fmv.h.x fa5, a1
-; RV64IZFBFMIN-NEXT:    fsh fa5, 8(sp)
-; RV64IZFBFMIN-NEXT:    lbu a1, 9(sp)
-; RV64IZFBFMIN-NEXT:    fmv.x.h a2, fa0
-; RV64IZFBFMIN-NEXT:    slli a2, a2, 49
-; RV64IZFBFMIN-NEXT:    andi a3, a1, 128
-; RV64IZFBFMIN-NEXT:    srli a1, a2, 49
-; RV64IZFBFMIN-NEXT:    bnez a3, .LBB7_2
-; RV64IZFBFMIN-NEXT:  # %bb.1:
-; RV64IZFBFMIN-NEXT:    fmv.h.x fa5, a1
-; RV64IZFBFMIN-NEXT:    j .LBB7_3
-; RV64IZFBFMIN-NEXT:  .LBB7_2:
+; RV64IZFBFMIN-NEXT:    fmv.x.h a0, fa5
+; RV64IZFBFMIN-NEXT:    not a0, a0
+; RV64IZFBFMIN-NEXT:    lui a1, 1048568
+; RV64IZFBFMIN-NEXT:    and a0, a0, a1
+; RV64IZFBFMIN-NEXT:    fmv.x.h a1, fa0
+; RV64IZFBFMIN-NEXT:    slli a1, a1, 49
+; RV64IZFBFMIN-NEXT:    srli a1, a1, 49
 ; RV64IZFBFMIN-NEXT:    or a0, a1, a0
-; RV64IZFBFMIN-NEXT:    fmv.h.x fa5, a0
-; RV64IZFBFMIN-NEXT:  .LBB7_3:
-; RV64IZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa5
-; RV64IZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
-; RV64IZFBFMIN-NEXT:    addi sp, sp, 16
+; RV64IZFBFMIN-NEXT:    fmv.h.x fa0, a0
 ; RV64IZFBFMIN-NEXT:    ret
   %1 = fadd bfloat %a, %b
   %2 = fneg bfloat %1
diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll
index 12c35b87dc6771..d8019c0ad61123 100644
--- a/llvm/test/CodeGen/RISCV/copysign-casts.ll
+++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll
@@ -503,54 +503,38 @@ define half @fold_demote_h_s(half %a, float %b) nounwind {
 ;
 ; RV32IFZFHMIN-LABEL: fold_demote_h_s:
 ; RV32IFZFHMIN:       # %bb.0:
-; RV32IFZFHMIN-NEXT:    fmv.x.h a0, fa0
-; RV32IFZFHMIN-NEXT:    slli a0, a0, 17
-; RV32IFZFHMIN-NEXT:    fmv.x.w a1, fa1
-; RV32IFZFHMIN-NEXT:    srli a0, a0, 17
-; RV32IFZFHMIN-NEXT:    bgez a1, .LBB4_2
-; RV32IFZFHMIN-NEXT:  # %bb.1:
-; RV32IFZFHMIN-NEXT:    lui a1, 1048568
-; RV32IFZFHMIN-NEXT:    or a0, a0, a1
-; RV32IFZFHMIN-NEXT:  .LBB4_2:
-; RV32IFZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV32IFZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; RV32IFZFHMIN-NEXT:    fcvt.h.s fa0, fa5
+; RV32IFZFHMIN-NEXT:    fmv.x.w a0, fa1
+; RV32IFZFHMIN-NEXT:    srli a0, a0, 31
+; RV32IFZFHMIN-NEXT:    slli a0, a0, 15
+; RV32IFZFHMIN-NEXT:    fmv.x.h a1, fa0
+; RV32IFZFHMIN-NEXT:    slli a1, a1, 17
+; RV32IFZFHMIN-NEXT:    srli a1, a1, 17
+; RV32IFZFHMIN-NEXT:    or a0, a1, a0
+; RV32IFZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32IFZFHMIN-NEXT:    ret
 ;
 ; RV32IFDZFHMIN-LABEL: fold_demote_h_s:
 ; RV32IFDZFHMIN:       # %bb.0:
-; RV32IFDZFHMIN-NEXT:    fmv.x.h a0, fa0
-; RV32IFDZFHMIN-NEXT:    slli a0, a0, 17
-; RV32IFDZFHMIN-NEXT:    fmv.x.w a1, fa1
-; RV32IFDZFHMIN-NEXT:    srli a0, a0, 17
-; RV32IFDZFHMIN-NEXT:    bgez a1, .LBB4_2
-; RV32IFDZFHMIN-NEXT:  # %bb.1:
-; RV32IFDZFHMIN-NEXT:    lui a1, 1048568
-; RV32IFDZFHMIN-NEXT:    or a0, a0, a1
-; RV32IFDZFHMIN-NEXT:  .LBB4_2:
-; RV32IFDZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV32IFDZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; RV32IFDZFHMIN-NEXT:    fcvt.h.s fa0, fa5
+; RV32IFDZFHMIN-NEXT:    fmv.x.w a0, fa1
+; RV32IFDZFHMIN-NEXT:    srli a0, a0, 31
+; RV32IFDZFHMIN-NEXT:    slli a0, a0, 15
+; RV32IFDZFHMIN-NEXT:    fmv.x.h a1, fa0
+; RV32IFDZFHMIN-NEXT:    slli a1, a1, 17
+; RV32IFDZFHMIN-NEXT:    srli a1, a1, 17
+; RV32IFDZFHMIN-NEXT:    or a0, a1, a0
+; RV32IFDZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32IFDZFHMIN-NEXT:    ret
 ;
 ; RV64IFDZFHMIN-LABEL: fold_demote_h_s:
 ; RV64IFDZFHMIN:       # %bb.0:
-; RV64IFDZFHMIN-NEXT:    addi sp, sp, -16
-; RV64IFDZFHMIN-NEXT:    fsw fa1, 8(sp)
-; RV64IFDZFHMIN-NEXT:    lbu a0, 11(sp)
+; RV64IFDZFHMIN-NEXT:    fmv.x.w a0, fa1
+; RV64IFDZFHMIN-NEXT:    srli a0, a0, 31
+; RV64IFDZFHMIN-NEXT:    slli a0, a0, 15
 ; RV64IFDZFHMIN-NEXT:    fmv.x.h a1, fa0
 ; RV64IFDZFHMIN-NEXT:    slli a1, a1, 49
-; RV64IFDZFHMIN-NEXT:    andi a2, a0, 128
-; RV64IFDZFHMIN-NEXT:    srli a0, a1, 49
-; RV64IFDZFHMIN-NEXT:    beqz a2, .LBB4_2
-; RV64IFDZFHMIN-NEXT:  # %bb.1:
-; RV64IFDZFHMIN-NEXT:    lui a1, 1048568
-; RV64IFDZFHMIN-NEXT:    or a0, a0, a1
-; RV64IFDZFHMIN-NEXT:  .LBB4_2:
-; RV64IFDZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV64IFDZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; RV64IFDZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; RV64IFDZFHMIN-NEXT:    addi sp, sp, 16
+; RV64IFDZFHMIN-NEXT:    srli a1, a1, 49
+; RV64IFDZFHMIN-NEXT:    or a0, a1, a0
+; RV64IFDZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV64IFDZFHMIN-NEXT:    ret
   %c = fptrunc float %b to half
   %t = call half @llvm.copysign.f16(half %a, half %c)
@@ -646,60 +630,40 @@ define half @fold_demote_h_d(half %a, double %b) nounwind {
 ;
 ; RV32IFZFHMIN-LABEL: fold_demote_h_d:
 ; RV32IFZFHMIN:       # %bb.0:
-; RV32IFZFHMIN-NEXT:    addi sp, sp, -16
-; RV32IFZFHMIN-NEXT:    srli a1, a1, 16
-; RV32IFZFHMIN-NEXT:    fmv.h.x fa5, a1
-; RV32IFZFHMIN-NEXT:    fsh fa5, 12(sp)
-; RV32IFZFHMIN-NEXT:    lbu a0, 13(sp)
-; RV32IFZFHMIN-NEXT:    fmv.x.h a1, fa0
-; RV32IFZFHMIN-NEXT:    slli a1, a1, 17
-; RV32IFZFHMIN-NEXT:    andi a2, a0, 128
-; RV32IFZFHMIN-NEXT:    srli a0, a1, 17
-; RV32IFZFHMIN-NEXT:    beqz a2, .LBB5_2
-; RV32IFZFHMIN-NEXT:  # %bb.1:
-; RV32IFZFHMIN-NEXT:    lui a1, 1048568
+; RV32IFZFHMIN-NEXT:    srli a1, a1, 31
+; RV32IFZFHMIN-NEXT:    slli a1, a1, 15
+; RV32IFZFHMIN-NEXT:    fmv.x.h a0, fa0
+; RV32IFZFHMIN-NEXT:    slli a0, a0, 17
+; RV32IFZFHMIN-NEXT:    srli a0, a0, 17
 ; RV32IFZFHMIN-NEXT:    or a0, a0, a1
-; RV32IFZFHMIN-NEXT:  .LBB5_2:
-; RV32IFZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV32IFZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; RV32IFZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; RV32IFZFHMIN-NEXT:    addi sp, sp, 16
+; RV32IFZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32IFZFHMIN-NEXT:    ret
 ;
 ; RV32IFDZFHMIN-LABEL: fold_demote_h_d:
 ; RV32IFDZFHMIN:       # %bb.0:
 ; RV32IFDZFHMIN-NEXT:    addi sp, sp, -16
 ; RV32IFDZFHMIN-NEXT:    fsd fa1, 8(sp)
-; RV32IFDZFHMIN-NEXT:    lbu a0, 15(sp)
+; RV32IFDZFHMIN-NEXT:    lw a0, 12(sp)
+; RV32IFDZFHMIN-NEXT:    srli a0, a0, 31
+; RV32IFDZFHMIN-NEXT:    slli a0, a0, 15
 ; RV32IFDZFHMIN-NEXT:    fmv.x.h a1, fa0
 ; RV32IFDZFHMIN-NEXT:    slli a1, a1, 17
-; RV32IFDZFHMIN-NEXT:    andi a2, a0, 128
-; RV32IFDZFHMIN-NEXT:    srli a0, a1, 17
-; RV32IFDZFHMIN-NEXT:    beqz a2, .LBB5_2
-; RV32IFDZFHMIN-NEXT:  # %bb.1:
-; RV32IFDZFHMIN-NEXT:    lui a1, 1048568
-; RV32IFDZFHMIN-NEXT:    or a0, a0, a1
-; RV32IFDZFHMIN-NEXT:  .LBB5_2:
-; RV32IFDZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV32IFDZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; RV32IFDZFHMIN-NEXT:    fcvt.h.s fa0, fa5
+; RV32IFDZFHMIN-NEXT:    srli a1, a1, 17
+; RV32IFDZFHMIN-NEXT:    or a0, a1, a0
+; RV32IFDZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32IFDZFHMIN-NEXT:    addi sp, sp, 16
 ; RV32IFDZFHMIN-NEXT:    ret
 ;
 ; RV64IFDZFHMIN-LABEL: fold_demote_h_d:
 ; RV64IFDZFHMIN:       # %bb.0:
-; RV64IFDZFHMIN-NEXT:    fmv.x.h a0, fa0
-; RV64IFDZFHMIN-NEXT:    slli a0, a0, 49
-; RV64IFDZFHMIN-NEXT:    fmv.x.d a1, fa1
-; RV64IFDZFHMIN-NEXT:    srli a0, a0, 49
-; RV64IFDZFHMIN-NEXT:    bgez a1, .LBB5_2
-; RV64IFDZFHMIN-NEXT:  # %bb.1:
-; RV64IFDZFHMIN-NEXT:    lui a1, 1048568
-; RV64IFDZFHMIN-NEXT:    or a0, a0, a1
-; RV64IFDZFHMIN-NEXT:  .LBB5_2:
-; RV64IFDZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV64IFDZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; RV64IFDZFHMIN-NEXT:    fcvt.h.s fa0, fa5
+; RV64IFDZFHMIN-NEXT:    fmv.x.d a0, fa1
+; RV64IFDZFHMIN-NEXT:    srli a0, a0, 63
+; RV64IFDZFHMIN-NEXT:    slli a0, a0, 15
+; RV64IFDZFHMIN-NEXT:    fmv.x.h a1, fa0
+; RV64IFDZFHMIN-NEXT:    slli a1, a1, 49
+; RV64IFDZFHMIN-NEXT:    srli a1, a1, 49
+; RV64IFDZFHMIN-NEXT:    or a0, a1, a0
+; RV64IFDZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV64IFDZFHMIN-NEXT:    ret
   %c = fptrunc double %b to half
   %t = call half @llvm.copysign.f16(half %a, half %c)
diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll
index 4c64e6c7a20906..f8522b09970bf9 100644
--- a/llvm/test/CodeGen/RISCV/half-arith.ll
+++ b/llvm/test/CodeGen/RISCV/half-arith.ll
@@ -442,78 +442,44 @@ define half @fsgnj_h(half %a, half %b) nounwind {
 ;
 ; RV32IZFHMIN-LABEL: fsgnj_h:
 ; RV32IZFHMIN:       # %bb.0:
-; RV32IZFHMIN-NEXT:    addi sp, sp, -16
-; RV32IZFHMIN-NEXT:    fsh fa1, 12(sp)
-; RV32IZFHMIN-NEXT:    lbu a0, 13(sp)
+; RV32IZFHMIN-NEXT:    fmv.x.h a0, fa1
+; RV32IZFHMIN-NEXT:    lui a1, 1048568
+; RV32IZFHMIN-NEXT:    and a0, a0, a1
 ; RV32IZFHMIN-NEXT:    fmv.x.h a1, fa0
 ; RV32IZFHMIN-NEXT:    slli a1, a1, 17
-; RV32IZFHMIN-NEXT:    andi a2, a0, 128
-; RV32IZFHMIN-NEXT:    srli a0, a1, 17
-; RV32IZFHMIN-NEXT:    beqz a2, .LBB5_2
-; RV32IZFHMIN-NEXT:  # %bb.1:
-; RV32IZFHMIN-NEXT:    lui a1, 1048568
-; RV32IZFHMIN-NEXT:    or a0, a0, a1
-; RV32IZFHMIN-NEXT:  .LBB5_2:
-; RV32IZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV32IZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; RV32IZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; RV32IZFHMIN-NEXT:    addi sp, sp, 16
+; RV32IZFHMIN-NEXT:    srli a1, a1, 17
+; RV32IZFHMIN-NEXT:    or a0, a1, a0
+; RV32IZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV32IZFHMIN-NEXT:    ret
 ;
 ; RV64IZFHMIN-LABEL: fsgnj_h:
 ; RV64IZFHMIN:       # %bb.0:
-; RV64IZFHMIN-NEXT:    addi sp, sp, -16
-; RV64IZFHMIN-NEXT:    fsh fa1, 8(sp)
-; RV64IZFHMIN-NEXT:    lbu a0, 9(sp)
+; RV64IZFHMIN-NEXT:    fmv.x.h a0, fa1
+; RV64IZFHMIN-NEXT:    lui a1, 1048568
+; RV64IZFHMIN-NEXT:    and a0, a0, a1
 ; RV64IZFHMIN-NEXT:    fmv.x.h a1, fa0
 ; RV64IZFHMIN-NEXT:    slli a1, a1, 49
-; RV64IZFHMIN-NEXT:    andi a2, a0, 128
-; RV64IZFHMIN-NEXT:    srli a0, a1, 49
-; RV64IZFHMIN-NEXT:    beqz a2, .LBB5_2
-; RV64IZFHMIN-NEXT:  # %bb.1:
-; RV64IZFHMIN-NEXT:    lui a1, 1048568
-; RV64IZFHMIN-NEXT:    or a0, a0, a1
-; RV64IZFHMIN-NEXT:  .LBB5_2:
-; RV64IZFHMIN-NEXT:    fmv.h.x fa5, a0
-; RV64IZFHMIN-NEXT:    fcvt.s.h fa5, fa5
-; RV64IZFHMIN-NEXT:    fcvt.h.s fa0, fa5
-; RV64IZFHMIN-NEXT:    addi sp, sp, 16
+; RV64IZFHMIN-NEXT:    srli a1, a1, 49
+; RV64IZFHMIN-NEXT:    or a0, a1, a0
+; RV64IZFHMIN-NEXT:    fmv.h.x fa0, a0
 ; RV64IZFHMIN-NEXT:    ret
 ;
 ; RV32IZHINXMIN-LABEL: fsgnj_h:
 ; RV32IZHINXMIN:       # %bb.0:
-; RV32IZHINXMIN-NEXT:    addi sp, sp, -16
-; RV32IZHINXMIN-NEXT:    sh a1, 12(sp)
-; RV32IZHINXMIN-NEXT:    lbu a1, 13(sp)
+; RV32IZHINXMIN-NEXT:    lui a2, 1048568
+; RV32IZHINXMIN-NEXT:    and a1, a1, a2
 ; RV32IZHINXMIN-NEXT:    slli a0, a0, 17
-; RV32IZHINXMIN-NEXT:    andi a1, a1, 128
 ; RV32IZHINXMIN-NEXT:    srli a0, a0, 17
-; RV32IZHINXMIN-NEXT:    beqz a1, .LBB5_2
-; RV32IZHINXMIN-NEXT:  # %bb.1:
-; RV32IZHINXMIN-NEXT:    lui a1, 1048568
 ; RV32IZHINXMIN-NEXT:    or a0, a0, a1
-; RV32IZHINXMIN-NEXT:  .LBB5_2:
-; RV32IZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; RV32IZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; RV32IZHINXMIN-NEXT:    addi sp, sp, 16
 ; RV32IZHINXMIN-NEXT:    ret
 ;
 ; RV64IZHINXMIN-LABEL: fsgnj_h:
 ; RV64IZHINXMIN:       # %bb.0:
-; RV64IZHINXMIN-NEXT:    addi sp, sp, -16
-; RV64IZHINXMIN-NEXT:    sh a1, 8(sp)
-; RV64IZHINXMIN-NEXT:    lbu a1, 9(sp)
+; RV64IZHINXMIN-NEXT:    lui a2, 1048568
+; RV64IZHINXMIN-NEXT:    and a1, a1, a2
 ; RV64IZHINXMIN-NEXT:    slli a0, a0, 49
-; RV64IZHINXMIN-NEXT:    andi a1, a1, 128
 ; RV64IZHINXMIN-NEXT:    srli a0, a0, 49
-; RV64IZHINXMIN-NEXT:    beqz a1, .LBB5_2
-; RV64IZHINXMIN-NEXT:  # %bb.1:
-; RV64IZHINXMIN-NEXT:    lui a1, 1048568
 ; RV64IZHINXMIN-NEXT:    or a0, a0, a1
-; RV64IZHINXMIN-NEXT:  .LBB5_2:
-; RV64IZHINXMIN-NEXT:    fcvt.s.h a0, a0
-; RV64IZHINXMIN-NEXT:    fcvt.h.s a0, a0
-; RV64IZHINXMIN-NEXT:    addi sp, sp, 16
 ; RV64IZHINXMIN-NEXT:    ret
   %1 = call half @llvm.copysign.f16(half %a, half %b)
   ret half %1
@@ -725,108 +691,64 @@ define half @fsgnjn_h(half %a, half %b) nounwind {
 ;
 ; RV32IZFHMIN-LABEL: fsgnjn_h:
 ; RV32IZFHMIN:       # %bb.0:
-; RV32IZFHMIN-NEXT:    addi sp, sp, -16
 ; RV32IZFHMIN-NEXT:    fcvt.s.h fa5, fa1
 ; RV32IZFHMIN-NEXT:    fcvt.s.h fa4, fa0
 ; RV32IZ...
[truncated]

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thank you!

@topperc topperc merged commit 9a1eded into llvm:main Sep 3, 2024
10 checks passed
@topperc topperc deleted the pr/f16-fcopysign branch September 3, 2024 05:04
@llvm-ci
Copy link
Collaborator

llvm-ci commented Sep 3, 2024

LLVM Buildbot has detected a new failure on builder sanitizer-x86_64-linux running on sanitizer-buildbot2 while building llvm at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/3400

Here is the relevant piece of the build log for the reference
Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
[378/384] Generating Fuzzer-x86_64-Test
[379/384] Generating MSAN_INST_GTEST.gtest-all.cc.x86_64.o
[380/384] Generating MSAN_INST_TEST_OBJECTS.msan_test.cpp.x86_64-with-call.o
[381/384] Generating Msan-x86_64-with-call-Test
[382/384] Generating MSAN_INST_TEST_OBJECTS.msan_test.cpp.x86_64.o
[383/384] Generating Msan-x86_64-Test
[383/384] Running compiler_rt regression tests
llvm-lit: /home/b/sanitizer-x86_64-linux/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 10229 tests, 88 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60..
FAIL: SanitizerCommon-lsan-i386-Linux :: Linux/soft_rss_limit_mb_test.cpp (7108 of 10229)
******************** TEST 'SanitizerCommon-lsan-i386-Linux :: Linux/soft_rss_limit_mb_test.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/b/sanitizer-x86_64-linux/build/build_default/./bin/clang  --driver-mode=g++ -gline-tables-only -fsanitize=leak  -m32 -funwind-tables  -I/home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test -ldl -O2 /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -o /home/b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/lsan-i386-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
+ /home/b/sanitizer-x86_64-linux/build/build_default/./bin/clang --driver-mode=g++ -gline-tables-only -fsanitize=leak -m32 -funwind-tables -I/home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test -ldl -O2 /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -o /home/b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/lsan-i386-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
RUN: at line 5: env LSAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=1      /home/b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/lsan-i386-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp 2>&1 | FileCheck /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_1
+ env LSAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=1 /home/b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/lsan-i386-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
+ FileCheck /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_1
/home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp:68:24: error: CHECK_MAY_RETURN_1: expected string not found in input
// CHECK_MAY_RETURN_1: allocating 512 times
                       ^
<stdin>:52:44: note: scanning from here
Some of the malloc calls returned non-null: 256
                                           ^
<stdin>:52:45: note: possible intended match here
Some of the malloc calls returned non-null: 256
                                            ^

Input file: <stdin>
Check file: /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
           47:  [256] 
           48:  [320] 
           49:  [384] 
           50:  [448] 
           51: Some of the malloc calls returned null: 256 
           52: Some of the malloc calls returned non-null: 256 
check:68'0                                                X~~~~ error: no match found
check:68'1                                                 ?    possible intended match
Step 14 (test compiler-rt default) failure: test compiler-rt default (failure)
...
[378/384] Generating Fuzzer-x86_64-Test
[379/384] Generating MSAN_INST_GTEST.gtest-all.cc.x86_64.o
[380/384] Generating MSAN_INST_TEST_OBJECTS.msan_test.cpp.x86_64-with-call.o
[381/384] Generating Msan-x86_64-with-call-Test
[382/384] Generating MSAN_INST_TEST_OBJECTS.msan_test.cpp.x86_64.o
[383/384] Generating Msan-x86_64-Test
[383/384] Running compiler_rt regression tests
llvm-lit: /home/b/sanitizer-x86_64-linux/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 10229 tests, 88 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60..
FAIL: SanitizerCommon-lsan-i386-Linux :: Linux/soft_rss_limit_mb_test.cpp (7108 of 10229)
******************** TEST 'SanitizerCommon-lsan-i386-Linux :: Linux/soft_rss_limit_mb_test.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/b/sanitizer-x86_64-linux/build/build_default/./bin/clang  --driver-mode=g++ -gline-tables-only -fsanitize=leak  -m32 -funwind-tables  -I/home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test -ldl -O2 /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -o /home/b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/lsan-i386-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
+ /home/b/sanitizer-x86_64-linux/build/build_default/./bin/clang --driver-mode=g++ -gline-tables-only -fsanitize=leak -m32 -funwind-tables -I/home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test -ldl -O2 /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -o /home/b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/lsan-i386-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
RUN: at line 5: env LSAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=1      /home/b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/lsan-i386-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp 2>&1 | FileCheck /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_1
+ env LSAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=1 /home/b/sanitizer-x86_64-linux/build/build_default/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/lsan-i386-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
+ FileCheck /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_1
/home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp:68:24: error: CHECK_MAY_RETURN_1: expected string not found in input
// CHECK_MAY_RETURN_1: allocating 512 times
                       ^
<stdin>:52:44: note: scanning from here
Some of the malloc calls returned non-null: 256
                                           ^
<stdin>:52:45: note: possible intended match here
Some of the malloc calls returned non-null: 256
                                            ^

Input file: <stdin>
Check file: /home/b/sanitizer-x86_64-linux/build/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            .
            .
            .
           47:  [256] 
           48:  [320] 
           49:  [384] 
           50:  [448] 
           51: Some of the malloc calls returned null: 256 
           52: Some of the malloc calls returned non-null: 256 
check:68'0                                                X~~~~ error: no match found
check:68'1                                                 ?    possible intended match

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants