[SystemZ] Add DAGCombine for FCOPYSIGN to remove rounding. (#136131)

JonPsson1 · web-flow · commit 94a14f9f0d88 · 2025-04-24T11:05:51.000+02:00
Add a DAGCombine for FCOPYSIGN that removes the rounding which is never
needed as the sign bit is already in the correct place. This helps in particular the
rounding to f16 case which needs a libcall.

Also remove the roundings for other FP VTs and simplify the CPSDR
patterns correspondingly.

fp-copysign-03.ll test updated, now also covering the other FP VT
combinations.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -792,6 +792,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
                        ISD::SINT_TO_FP,
                        ISD::UINT_TO_FP,
                        ISD::STRICT_FP_EXTEND,
+                       ISD::FCOPYSIGN,
                        ISD::BSWAP,
                        ISD::SETCC,
                        ISD::SRL,
@@ -8548,6 +8549,22 @@ SDValue SystemZTargetLowering::combineINT_TO_FP(
   return SDValue();
 }
 
+SDValue SystemZTargetLowering::combineFCOPYSIGN(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  SDValue ValOp = N->getOperand(0);
+  SDValue SignOp = N->getOperand(1);
+
+  // Remove the rounding which is not needed.
+  if (SignOp.getOpcode() == ISD::FP_ROUND) {
+    SDValue WideOp = SignOp.getOperand(0);
+    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
+  }
+
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::combineBSWAP(
     SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -9137,6 +9154,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FP_EXTEND:          return combineFP_EXTEND(N, DCI);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:         return combineINT_TO_FP(N, DCI);
+  case ISD::FCOPYSIGN:          return combineFCOPYSIGN(N, DCI);
   case ISD::BSWAP:              return combineBSWAP(N, DCI);
   case ISD::SETCC:              return combineSETCC(N, DCI);
   case SystemZISD::BR_CCMASK:   return combineBR_CCMASK(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -777,6 +777,7 @@ class SystemZTargetLowering : public TargetLowering {
   SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineFCOPYSIGN(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSETCC(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -109,10 +109,10 @@ let isCodeGenOnly = 1 in {
 
 // The sign of an FP128 is in the high register.
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))),
+  def : Pat<(fcopysign FP32:$src1, (f128 FP128:$src2)),
             (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
 let Predicates = [FeatureVectorEnhancements1] in
-  def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
+  def : Pat<(fcopysign FP32:$src1, (f128 VR128:$src2)),
             (CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
 
 // fcopysign with an FP64 result.
@@ -124,10 +124,10 @@ def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;
 
 // The sign of an FP128 is in the high register.
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))),
+  def : Pat<(fcopysign FP64:$src1, (f128 FP128:$src2)),
             (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
 let Predicates = [FeatureVectorEnhancements1] in
-  def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
+  def : Pat<(fcopysign FP64:$src1, (f128 VR128:$src2)),
             (CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
 
 // fcopysign with an FP128 result.  Use "upper" as the high half and leave
diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll
@@ -1,4 +1,4 @@
-; Test copysign operations.
+; Test copysign libcalls.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-02.ll
@@ -1,4 +1,4 @@
-; Test f128 copysign operations on z14.
+; Test f128 copysign libcalls on z14.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,Z16
 ;
-; Test copysign intrinsics with half.
+; Test copysign intrinsics.
 
 declare half @llvm.copysign.f16(half, half)
 declare float @llvm.copysign.f32(float, float)
@@ -43,53 +43,25 @@ define half @f2(half %a, double %b) {
 }
 
 ; Test copysign with an f16 result and f128 sign argument.
-; TODO: Let the DAGCombiner remove the fp_round.
 define half @f3(half %a, fp128 %b) {
 ; Z10-LABEL: f3:
 ; Z10:       # %bb.0:
-; Z10-NEXT:    stmg %r14, %r15, 112(%r15)
-; Z10-NEXT:    .cfi_offset %r14, -48
-; Z10-NEXT:    .cfi_offset %r15, -40
-; Z10-NEXT:    aghi %r15, -184
-; Z10-NEXT:    .cfi_def_cfa_offset 344
-; Z10-NEXT:    std %f8, 176(%r15) # 8-byte Spill
-; Z10-NEXT:    .cfi_offset %f8, -168
 ; Z10-NEXT:    ld %f1, 0(%r2)
 ; Z10-NEXT:    ld %f3, 8(%r2)
-; Z10-NEXT:    ler %f8, %f0
-; Z10-NEXT:    la %r2, 160(%r15)
-; Z10-NEXT:    std %f1, 160(%r15)
-; Z10-NEXT:    std %f3, 168(%r15)
-; Z10-NEXT:    brasl %r14, __trunctfhf2@PLT
-; Z10-NEXT:    cpsdr %f0, %f0, %f8
-; Z10-NEXT:    ld %f8, 176(%r15) # 8-byte Reload
-; Z10-NEXT:    lmg %r14, %r15, 296(%r15)
+; Z10-NEXT:    cpsdr %f0, %f1, %f0
 ; Z10-NEXT:    br %r14
 ;
 ; Z16-LABEL: f3:
 ; Z16:       # %bb.0:
-; Z16-NEXT:    stmg %r14, %r15, 112(%r15)
-; Z16-NEXT:    .cfi_offset %r14, -48
-; Z16-NEXT:    .cfi_offset %r15, -40
-; Z16-NEXT:    aghi %r15, -184
-; Z16-NEXT:    .cfi_def_cfa_offset 344
-; Z16-NEXT:    std %f8, 176(%r15) # 8-byte Spill
-; Z16-NEXT:    .cfi_offset %f8, -168
-; Z16-NEXT:    ldr %f8, %f0
-; Z16-NEXT:    vl %v0, 0(%r2), 3
-; Z16-NEXT:    la %r2, 160(%r15)
-; Z16-NEXT:    vst %v0, 160(%r15), 3
-; Z16-NEXT:    brasl %r14, __trunctfhf2@PLT
-; Z16-NEXT:    cpsdr %f0, %f0, %f8
-; Z16-NEXT:    ld %f8, 176(%r15) # 8-byte Reload
-; Z16-NEXT:    lmg %r14, %r15, 296(%r15)
+; Z16-NEXT:    vl %v1, 0(%r2), 3
+; Z16-NEXT:    cpsdr %f0, %f1, %f0
 ; Z16-NEXT:    br %r14
   %bh = fptrunc fp128 %b to half
   %res = call half @llvm.copysign.f16(half %a, half %bh)
   ret half %res
 }
 
-; Test copysign with an f32 result and half sign argument.
+; Test copysign with an f32 result and f16 sign argument.
 define float @f4(float %a, half %b) {
 ; CHECK-LABEL: f4:
 ; CHECK:       # %bb.0:
@@ -100,20 +72,100 @@ define float @f4(float %a, half %b) {
   ret float %res
 }
 
-; Test copysign with an f64 result and half sign argument.
-define double @f5(double %a, half %b) {
+; Test copysign with an f32 result and f32 sign argument.
+define float @f5(float %a, float %b) {
 ; CHECK-LABEL: f5:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cpsdr %f0, %f2, %f0
+; CHECK-NEXT:    br %r14
+  %res = call float @llvm.copysign.f32(float %a, float %b)
+  ret float %res
+}
+
+; Test copysign with an f32 result and f64 sign argument.
+define float @f6(float %a, double %b) {
+; CHECK-LABEL: f6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cpsdr %f0, %f2, %f0
+; CHECK-NEXT:    br %r14
+  %bf = fptrunc double %b to float
+  %res = call float @llvm.copysign.f32(float %a, float %bf)
+  ret float %res
+}
+
+; Test copysign with an f32 result and f128 sign argument.
+define float @f7(float %a, fp128 %b) {
+; Z10-LABEL: f7:
+; Z10:       # %bb.0:
+; Z10-NEXT:    ld %f1, 0(%r2)
+; Z10-NEXT:    ld %f3, 8(%r2)
+; Z10-NEXT:    cpsdr %f0, %f1, %f0
+; Z10-NEXT:    br %r14
+;
+; Z16-LABEL: f7:
+; Z16:       # %bb.0:
+; Z16-NEXT:    vl %v1, 0(%r2), 3
+; Z16-NEXT:    cpsdr %f0, %f1, %f0
+; Z16-NEXT:    br %r14
+  %bf = fptrunc fp128 %b to float
+  %res = call float @llvm.copysign.f32(float %a, float %bf)
+  ret float %res
+}
+
+; Test copysign with an f64 result and f16 sign argument.
+define double @f8(double %a, half %b) {
+; CHECK-LABEL: f8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cpsdr %f0, %f2, %f0
 ; CHECK-NEXT:    br %r14
   %bd = fpext half %b to double
   %res = call double @llvm.copysign.f64(double %a, double %bd)
   ret double %res
 }
 
-; Test copysign with an f128 result and half sign argument.
-define fp128 @f6(fp128 %a, half %b) {
-; Z10-LABEL: f6:
+; Test copysign with an f64 result and f32 sign argument.
+define double @f9(double %a, float %b) {
+; CHECK-LABEL: f9:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cpsdr %f0, %f2, %f0
+; CHECK-NEXT:    br %r14
+  %bd = fpext float %b to double
+  %res = call double @llvm.copysign.f64(double %a, double %bd)
+  ret double %res
+}
+
+; Test copysign with an f64 result and f64 sign argument.
+define double @f10(double %a, double %b) {
+; CHECK-LABEL: f10:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cpsdr %f0, %f2, %f0
+; CHECK-NEXT:    br %r14
+  %res = call double @llvm.copysign.f64(double %a, double %b)
+  ret double %res
+}
+
+; Test copysign with an f64 result and f128 sign argument.
+define double @f11(double %a, fp128 %b) {
+; Z10-LABEL: f11:
+; Z10:       # %bb.0:
+; Z10-NEXT:    ld %f1, 0(%r2)
+; Z10-NEXT:    ld %f3, 8(%r2)
+; Z10-NEXT:    cpsdr %f0, %f1, %f0
+; Z10-NEXT:    br %r14
+;
+; Z16-LABEL: f11:
+; Z16:       # %bb.0:
+; Z16-NEXT:    vl %v1, 0(%r2), 3
+; Z16-NEXT:    cpsdr %f0, %f1, %f0
+; Z16-NEXT:    br %r14
+  %bd = fptrunc fp128 %b to double
+  %res = call double @llvm.copysign.f64(double %a, double %bd)
+  ret double %res
+}
+
+; Test copysign with an f128 result and f16 sign argument.
+define fp128 @f12(fp128 %a, half %b) {
+; Z10-LABEL: f12:
 ; Z10:       # %bb.0:
 ; Z10-NEXT:    ld %f1, 0(%r3)
 ; Z10-NEXT:    ld %f3, 8(%r3)
@@ -122,24 +174,117 @@ define fp128 @f6(fp128 %a, half %b) {
 ; Z10-NEXT:    std %f3, 8(%r2)
 ; Z10-NEXT:    br %r14
 ;
-; Z16-LABEL: f6:
+; Z16-LABEL: f12:
 ; Z16:       # %bb.0:
 ; Z16-NEXT:    aghi %r15, -168
 ; Z16-NEXT:    .cfi_def_cfa_offset 328
 ; Z16-NEXT:    vl %v1, 0(%r3), 3
 ; Z16-NEXT:    vsteh %v0, 164(%r15), 0
 ; Z16-NEXT:    tm 164(%r15), 128
-; Z16-NEXT:    je .LBB6_2
+; Z16-NEXT:    je .LBB12_2
 ; Z16-NEXT:  # %bb.1:
 ; Z16-NEXT:    wflnxb %v0, %v1
-; Z16-NEXT:    j .LBB6_3
-; Z16-NEXT:  .LBB6_2:
+; Z16-NEXT:    j .LBB12_3
+; Z16-NEXT:  .LBB12_2:
 ; Z16-NEXT:    wflpxb %v0, %v1
-; Z16-NEXT:  .LBB6_3:
+; Z16-NEXT:  .LBB12_3:
 ; Z16-NEXT:    vst %v0, 0(%r2), 3
 ; Z16-NEXT:    aghi %r15, 168
 ; Z16-NEXT:    br %r14
-  %bd = fpext half %b to fp128
-  %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %bd)
+  %b128 = fpext half %b to fp128
+  %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
+  ret fp128 %res
+}
+
+; Test copysign with an f128 result and f32 sign argument.
+define fp128 @f13(fp128 %a, float %b) {
+; Z10-LABEL: f13:
+; Z10:       # %bb.0:
+; Z10-NEXT:    ld %f1, 0(%r3)
+; Z10-NEXT:    ld %f3, 8(%r3)
+; Z10-NEXT:    cpsdr %f1, %f0, %f1
+; Z10-NEXT:    std %f1, 0(%r2)
+; Z10-NEXT:    std %f3, 8(%r2)
+; Z10-NEXT:    br %r14
+;
+; Z16-LABEL: f13:
+; Z16:       # %bb.0:
+; Z16-NEXT:    vl %v1, 0(%r3), 3
+; Z16-NEXT:    vlgvf %r0, %v0, 0
+; Z16-NEXT:    tmlh %r0, 32768
+; Z16-NEXT:    je .LBB13_2
+; Z16-NEXT:  # %bb.1:
+; Z16-NEXT:    wflnxb %v0, %v1
+; Z16-NEXT:    vst %v0, 0(%r2), 3
+; Z16-NEXT:    br %r14
+; Z16-NEXT:  .LBB13_2:
+; Z16-NEXT:    wflpxb %v0, %v1
+; Z16-NEXT:    vst %v0, 0(%r2), 3
+; Z16-NEXT:    br %r14
+  %b128 = fpext float %b to fp128
+  %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
+  ret fp128 %res
+}
+
+; Test copysign with an f128 result and f64 sign argument.
+define fp128 @f14(fp128 %a, double %b) {
+; Z10-LABEL: f14:
+; Z10:       # %bb.0:
+; Z10-NEXT:    ld %f1, 0(%r3)
+; Z10-NEXT:    ld %f3, 8(%r3)
+; Z10-NEXT:    cpsdr %f1, %f0, %f1
+; Z10-NEXT:    std %f1, 0(%r2)
+; Z10-NEXT:    std %f3, 8(%r2)
+; Z10-NEXT:    br %r14
+;
+; Z16-LABEL: f14:
+; Z16:       # %bb.0:
+; Z16-NEXT:    vl %v1, 0(%r3), 3
+; Z16-NEXT:    lgdr %r0, %f0
+; Z16-NEXT:    tmhh %r0, 32768
+; Z16-NEXT:    je .LBB14_2
+; Z16-NEXT:  # %bb.1:
+; Z16-NEXT:    wflnxb %v0, %v1
+; Z16-NEXT:    vst %v0, 0(%r2), 3
+; Z16-NEXT:    br %r14
+; Z16-NEXT:  .LBB14_2:
+; Z16-NEXT:    wflpxb %v0, %v1
+; Z16-NEXT:    vst %v0, 0(%r2), 3
+; Z16-NEXT:    br %r14
+  %b128 = fpext double %b to fp128
+  %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
+  ret fp128 %res
+}
+
+; Test copysign with an f128 result and f128 sign argument.
+define fp128 @f15(fp128 %a, fp128 %b) {
+; Z10-LABEL: f15:
+; Z10:       # %bb.0:
+; Z10-NEXT:    ld %f0, 0(%r3)
+; Z10-NEXT:    ld %f2, 8(%r3)
+; Z10-NEXT:    ld %f1, 0(%r4)
+; Z10-NEXT:    ld %f3, 8(%r4)
+; Z10-NEXT:    cpsdr %f0, %f1, %f0
+; Z10-NEXT:    std %f0, 0(%r2)
+; Z10-NEXT:    std %f2, 8(%r2)
+; Z10-NEXT:    br %r14
+;
+; Z16-LABEL: f15:
+; Z16:       # %bb.0:
+; Z16-NEXT:    larl %r1, .LCPI15_0
+; Z16-NEXT:    vl %v1, 0(%r4), 3
+; Z16-NEXT:    vl %v2, 0(%r1), 3
+; Z16-NEXT:    vl %v0, 0(%r3), 3
+; Z16-NEXT:    vtm %v1, %v2
+; Z16-NEXT:    je .LBB15_2
+; Z16-NEXT:  # %bb.1:
+; Z16-NEXT:    wflnxb %v0, %v0
+; Z16-NEXT:    vst %v0, 0(%r2), 3
+; Z16-NEXT:    br %r14
+; Z16-NEXT:  .LBB15_2:
+; Z16-NEXT:    wflpxb %v0, %v0
+; Z16-NEXT:    vst %v0, 0(%r2), 3
+; Z16-NEXT:    br %r14
+  %res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
   ret fp128 %res
 }

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-; Test copysign operations.`
	`1`	`+; Test copysign libcalls.`
`2`	`2`	`;`
`3`	`3`	`; RUN: llc < %s -mtriple=s390x-linux-gnu \| FileCheck %s`
`4`	`4`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-; Test f128 copysign operations on z14.`
	`1`	`+; Test f128 copysign libcalls on z14.`
`2`	`2`	`;`
`3`	`3`	`; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \| FileCheck %s`
`4`	`4`