Skip to content

[SystemZ] Add DAGCombine for FCOPYSIGN to remove rounding. #136131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
ISD::SINT_TO_FP,
ISD::UINT_TO_FP,
ISD::STRICT_FP_EXTEND,
ISD::FCOPYSIGN,
ISD::BSWAP,
ISD::SETCC,
ISD::SRL,
Expand Down Expand Up @@ -8548,6 +8549,22 @@ SDValue SystemZTargetLowering::combineINT_TO_FP(
return SDValue();
}

SDValue SystemZTargetLowering::combineFCOPYSIGN(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
SDValue ValOp = N->getOperand(0);
SDValue SignOp = N->getOperand(1);

// Remove the rounding which is not needed.
if (SignOp.getOpcode() == ISD::FP_ROUND) {
SDValue WideOp = SignOp.getOperand(0);
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
}

return SDValue();
}

SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand Down Expand Up @@ -9137,6 +9154,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case ISD::SETCC: return combineSETCC(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFCOPYSIGN(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSETCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/SystemZ/SystemZInstrFP.td
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,10 @@ let isCodeGenOnly = 1 in {

// The sign of an FP128 is in the high register.
let Predicates = [FeatureNoVectorEnhancements1] in
def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))),
def : Pat<(fcopysign FP32:$src1, (f128 FP128:$src2)),
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
let Predicates = [FeatureVectorEnhancements1] in
def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
def : Pat<(fcopysign FP32:$src1, (f128 VR128:$src2)),
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;

// fcopysign with an FP64 result.
Expand All @@ -124,10 +124,10 @@ def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;

// The sign of an FP128 is in the high register.
let Predicates = [FeatureNoVectorEnhancements1] in
def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))),
def : Pat<(fcopysign FP64:$src1, (f128 FP128:$src2)),
(CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
let Predicates = [FeatureVectorEnhancements1] in
def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
def : Pat<(fcopysign FP64:$src1, (f128 VR128:$src2)),
(CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;

// fcopysign with an FP128 result. Use "upper" as the high half and leave
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/SystemZ/fp-copysign-01.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; Test copysign operations.
; Test copysign libcalls.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/SystemZ/fp-copysign-02.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; Test f128 copysign operations on z14.
; Test f128 copysign libcalls on z14.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s

Expand Down
235 changes: 190 additions & 45 deletions llvm/test/CodeGen/SystemZ/fp-copysign-03.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 \
; RUN: | FileCheck %s --check-prefixes=CHECK,Z16
;
; Test copysign intrinsics with half.
; Test copysign intrinsics.

declare half @llvm.copysign.f16(half, half)
declare float @llvm.copysign.f32(float, float)
Expand Down Expand Up @@ -43,53 +43,25 @@ define half @f2(half %a, double %b) {
}

; Test copysign with an f16 result and f128 sign argument.
; TODO: Let the DAGCombiner remove the fp_round.
define half @f3(half %a, fp128 %b) {
; Z10-LABEL: f3:
; Z10: # %bb.0:
; Z10-NEXT: stmg %r14, %r15, 112(%r15)
; Z10-NEXT: .cfi_offset %r14, -48
; Z10-NEXT: .cfi_offset %r15, -40
; Z10-NEXT: aghi %r15, -184
; Z10-NEXT: .cfi_def_cfa_offset 344
; Z10-NEXT: std %f8, 176(%r15) # 8-byte Spill
; Z10-NEXT: .cfi_offset %f8, -168
; Z10-NEXT: ld %f1, 0(%r2)
; Z10-NEXT: ld %f3, 8(%r2)
; Z10-NEXT: ler %f8, %f0
; Z10-NEXT: la %r2, 160(%r15)
; Z10-NEXT: std %f1, 160(%r15)
; Z10-NEXT: std %f3, 168(%r15)
; Z10-NEXT: brasl %r14, __trunctfhf2@PLT
; Z10-NEXT: cpsdr %f0, %f0, %f8
; Z10-NEXT: ld %f8, 176(%r15) # 8-byte Reload
; Z10-NEXT: lmg %r14, %r15, 296(%r15)
; Z10-NEXT: cpsdr %f0, %f1, %f0
; Z10-NEXT: br %r14
;
; Z16-LABEL: f3:
; Z16: # %bb.0:
; Z16-NEXT: stmg %r14, %r15, 112(%r15)
; Z16-NEXT: .cfi_offset %r14, -48
; Z16-NEXT: .cfi_offset %r15, -40
; Z16-NEXT: aghi %r15, -184
; Z16-NEXT: .cfi_def_cfa_offset 344
; Z16-NEXT: std %f8, 176(%r15) # 8-byte Spill
; Z16-NEXT: .cfi_offset %f8, -168
; Z16-NEXT: ldr %f8, %f0
; Z16-NEXT: vl %v0, 0(%r2), 3
; Z16-NEXT: la %r2, 160(%r15)
; Z16-NEXT: vst %v0, 160(%r15), 3
; Z16-NEXT: brasl %r14, __trunctfhf2@PLT
; Z16-NEXT: cpsdr %f0, %f0, %f8
; Z16-NEXT: ld %f8, 176(%r15) # 8-byte Reload
; Z16-NEXT: lmg %r14, %r15, 296(%r15)
; Z16-NEXT: vl %v1, 0(%r2), 3
; Z16-NEXT: cpsdr %f0, %f1, %f0
; Z16-NEXT: br %r14
%bh = fptrunc fp128 %b to half
%res = call half @llvm.copysign.f16(half %a, half %bh)
ret half %res
}

; Test copysign with an f32 result and half sign argument.
; Test copysign with an f32 result and f16 sign argument.
define float @f4(float %a, half %b) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
Expand All @@ -100,20 +72,100 @@ define float @f4(float %a, half %b) {
ret float %res
}

; Test copysign with an f64 result and half sign argument.
define double @f5(double %a, half %b) {
; Test copysign with an f32 result and f32 sign argument.
define float @f5(float %a, float %b) {
; CHECK-LABEL: f5:
; CHECK: # %bb.0:
; CHECK-NEXT: cpsdr %f0, %f2, %f0
; CHECK-NEXT: br %r14
%res = call float @llvm.copysign.f32(float %a, float %b)
ret float %res
}

; Test copysign with an f32 result and f64 sign argument.
define float @f6(float %a, double %b) {
; CHECK-LABEL: f6:
; CHECK: # %bb.0:
; CHECK-NEXT: cpsdr %f0, %f2, %f0
; CHECK-NEXT: br %r14
%bf = fptrunc double %b to float
%res = call float @llvm.copysign.f32(float %a, float %bf)
ret float %res
}

; Test copysign with an f32 result and f128 sign argument.
define float @f7(float %a, fp128 %b) {
; Z10-LABEL: f7:
; Z10: # %bb.0:
; Z10-NEXT: ld %f1, 0(%r2)
; Z10-NEXT: ld %f3, 8(%r2)
; Z10-NEXT: cpsdr %f0, %f1, %f0
; Z10-NEXT: br %r14
;
; Z16-LABEL: f7:
; Z16: # %bb.0:
; Z16-NEXT: vl %v1, 0(%r2), 3
; Z16-NEXT: cpsdr %f0, %f1, %f0
; Z16-NEXT: br %r14
%bf = fptrunc fp128 %b to float
%res = call float @llvm.copysign.f32(float %a, float %bf)
ret float %res
}

; Test copysign with an f64 result and f16 sign argument.
define double @f8(double %a, half %b) {
; CHECK-LABEL: f8:
; CHECK: # %bb.0:
; CHECK-NEXT: cpsdr %f0, %f2, %f0
; CHECK-NEXT: br %r14
%bd = fpext half %b to double
%res = call double @llvm.copysign.f64(double %a, double %bd)
ret double %res
}

; Test copysign with an f128 result and half sign argument.
define fp128 @f6(fp128 %a, half %b) {
; Z10-LABEL: f6:
; Test copysign with an f64 result and f32 sign argument.
define double @f9(double %a, float %b) {
; CHECK-LABEL: f9:
; CHECK: # %bb.0:
; CHECK-NEXT: cpsdr %f0, %f2, %f0
; CHECK-NEXT: br %r14
%bd = fpext float %b to double
%res = call double @llvm.copysign.f64(double %a, double %bd)
ret double %res
}

; Test copysign with an f64 result and f64 sign argument.
define double @f10(double %a, double %b) {
; CHECK-LABEL: f10:
; CHECK: # %bb.0:
; CHECK-NEXT: cpsdr %f0, %f2, %f0
; CHECK-NEXT: br %r14
%res = call double @llvm.copysign.f64(double %a, double %b)
ret double %res
}

; Test copysign with an f64 result and f128 sign argument.
define double @f11(double %a, fp128 %b) {
; Z10-LABEL: f11:
; Z10: # %bb.0:
; Z10-NEXT: ld %f1, 0(%r2)
; Z10-NEXT: ld %f3, 8(%r2)
; Z10-NEXT: cpsdr %f0, %f1, %f0
; Z10-NEXT: br %r14
;
; Z16-LABEL: f11:
; Z16: # %bb.0:
; Z16-NEXT: vl %v1, 0(%r2), 3
; Z16-NEXT: cpsdr %f0, %f1, %f0
; Z16-NEXT: br %r14
%bd = fptrunc fp128 %b to double
%res = call double @llvm.copysign.f64(double %a, double %bd)
ret double %res
}

; Test copysign with an f128 result and f16 sign argument.
define fp128 @f12(fp128 %a, half %b) {
; Z10-LABEL: f12:
; Z10: # %bb.0:
; Z10-NEXT: ld %f1, 0(%r3)
; Z10-NEXT: ld %f3, 8(%r3)
Expand All @@ -122,24 +174,117 @@ define fp128 @f6(fp128 %a, half %b) {
; Z10-NEXT: std %f3, 8(%r2)
; Z10-NEXT: br %r14
;
; Z16-LABEL: f6:
; Z16-LABEL: f12:
; Z16: # %bb.0:
; Z16-NEXT: aghi %r15, -168
; Z16-NEXT: .cfi_def_cfa_offset 328
; Z16-NEXT: vl %v1, 0(%r3), 3
; Z16-NEXT: vsteh %v0, 164(%r15), 0
; Z16-NEXT: tm 164(%r15), 128
; Z16-NEXT: je .LBB6_2
; Z16-NEXT: je .LBB12_2
; Z16-NEXT: # %bb.1:
; Z16-NEXT: wflnxb %v0, %v1
; Z16-NEXT: j .LBB6_3
; Z16-NEXT: .LBB6_2:
; Z16-NEXT: j .LBB12_3
; Z16-NEXT: .LBB12_2:
; Z16-NEXT: wflpxb %v0, %v1
; Z16-NEXT: .LBB6_3:
; Z16-NEXT: .LBB12_3:
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: aghi %r15, 168
; Z16-NEXT: br %r14
%bd = fpext half %b to fp128
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %bd)
%b128 = fpext half %b to fp128
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
ret fp128 %res
}

; Test copysign with an f128 result and f32 sign argument.
define fp128 @f13(fp128 %a, float %b) {
; Z10-LABEL: f13:
; Z10: # %bb.0:
; Z10-NEXT: ld %f1, 0(%r3)
; Z10-NEXT: ld %f3, 8(%r3)
; Z10-NEXT: cpsdr %f1, %f0, %f1
; Z10-NEXT: std %f1, 0(%r2)
; Z10-NEXT: std %f3, 8(%r2)
; Z10-NEXT: br %r14
;
; Z16-LABEL: f13:
; Z16: # %bb.0:
; Z16-NEXT: vl %v1, 0(%r3), 3
; Z16-NEXT: vlgvf %r0, %v0, 0
; Z16-NEXT: tmlh %r0, 32768
; Z16-NEXT: je .LBB13_2
; Z16-NEXT: # %bb.1:
; Z16-NEXT: wflnxb %v0, %v1
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: br %r14
; Z16-NEXT: .LBB13_2:
; Z16-NEXT: wflpxb %v0, %v1
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: br %r14
%b128 = fpext float %b to fp128
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
ret fp128 %res
}

; Test copysign with an f128 result and f64 sign argument.
define fp128 @f14(fp128 %a, double %b) {
; Z10-LABEL: f14:
; Z10: # %bb.0:
; Z10-NEXT: ld %f1, 0(%r3)
; Z10-NEXT: ld %f3, 8(%r3)
; Z10-NEXT: cpsdr %f1, %f0, %f1
; Z10-NEXT: std %f1, 0(%r2)
; Z10-NEXT: std %f3, 8(%r2)
; Z10-NEXT: br %r14
;
; Z16-LABEL: f14:
; Z16: # %bb.0:
; Z16-NEXT: vl %v1, 0(%r3), 3
; Z16-NEXT: lgdr %r0, %f0
; Z16-NEXT: tmhh %r0, 32768
; Z16-NEXT: je .LBB14_2
; Z16-NEXT: # %bb.1:
; Z16-NEXT: wflnxb %v0, %v1
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: br %r14
; Z16-NEXT: .LBB14_2:
; Z16-NEXT: wflpxb %v0, %v1
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: br %r14
%b128 = fpext double %b to fp128
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
ret fp128 %res
}

; Test copysign with an f128 result and f128 sign argument.
define fp128 @f15(fp128 %a, fp128 %b) {
; Z10-LABEL: f15:
; Z10: # %bb.0:
; Z10-NEXT: ld %f0, 0(%r3)
; Z10-NEXT: ld %f2, 8(%r3)
; Z10-NEXT: ld %f1, 0(%r4)
; Z10-NEXT: ld %f3, 8(%r4)
; Z10-NEXT: cpsdr %f0, %f1, %f0
; Z10-NEXT: std %f0, 0(%r2)
; Z10-NEXT: std %f2, 8(%r2)
; Z10-NEXT: br %r14
;
; Z16-LABEL: f15:
; Z16: # %bb.0:
; Z16-NEXT: larl %r1, .LCPI15_0
; Z16-NEXT: vl %v1, 0(%r4), 3
; Z16-NEXT: vl %v2, 0(%r1), 3
; Z16-NEXT: vl %v0, 0(%r3), 3
; Z16-NEXT: vtm %v1, %v2
; Z16-NEXT: je .LBB15_2
; Z16-NEXT: # %bb.1:
; Z16-NEXT: wflnxb %v0, %v0
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: br %r14
; Z16-NEXT: .LBB15_2:
; Z16-NEXT: wflpxb %v0, %v0
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: br %r14
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
ret fp128 %res
}
Loading