Skip to content

Commit 94a14f9

Browse files
authored
[SystemZ] Add DAGCombine for FCOPYSIGN to remove rounding. (#136131)
Add a DAGCombine for FCOPYSIGN that removes the rounding which is never needed as the sign bit is already in the correct place. This helps in particular the rounding to f16 case which needs a libcall. Also remove the roundings for other FP VTs and simplify the CPSDR patterns correspondingly. fp-copysign-03.ll test updated, now also covering the other FP VT combinations.
1 parent 55066b8 commit 94a14f9

File tree

6 files changed

+215
-51
lines changed

6 files changed

+215
-51
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
792792
ISD::SINT_TO_FP,
793793
ISD::UINT_TO_FP,
794794
ISD::STRICT_FP_EXTEND,
795+
ISD::FCOPYSIGN,
795796
ISD::BSWAP,
796797
ISD::SETCC,
797798
ISD::SRL,
@@ -8548,6 +8549,22 @@ SDValue SystemZTargetLowering::combineINT_TO_FP(
85488549
return SDValue();
85498550
}
85508551

8552+
SDValue SystemZTargetLowering::combineFCOPYSIGN(
8553+
SDNode *N, DAGCombinerInfo &DCI) const {
8554+
SelectionDAG &DAG = DCI.DAG;
8555+
EVT VT = N->getValueType(0);
8556+
SDValue ValOp = N->getOperand(0);
8557+
SDValue SignOp = N->getOperand(1);
8558+
8559+
// Remove the rounding which is not needed.
8560+
if (SignOp.getOpcode() == ISD::FP_ROUND) {
8561+
SDValue WideOp = SignOp.getOperand(0);
8562+
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8563+
}
8564+
8565+
return SDValue();
8566+
}
8567+
85518568
SDValue SystemZTargetLowering::combineBSWAP(
85528569
SDNode *N, DAGCombinerInfo &DCI) const {
85538570
SelectionDAG &DAG = DCI.DAG;
@@ -9137,6 +9154,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
91379154
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
91389155
case ISD::SINT_TO_FP:
91399156
case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9157+
case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
91409158
case ISD::BSWAP: return combineBSWAP(N, DCI);
91419159
case ISD::SETCC: return combineSETCC(N, DCI);
91429160
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,7 @@ class SystemZTargetLowering : public TargetLowering {
777777
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
778778
SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
779779
SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
780+
SDValue combineFCOPYSIGN(SDNode *N, DAGCombinerInfo &DCI) const;
780781
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
781782
SDValue combineSETCC(SDNode *N, DAGCombinerInfo &DCI) const;
782783
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;

llvm/lib/Target/SystemZ/SystemZInstrFP.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,10 @@ let isCodeGenOnly = 1 in {
109109

110110
// The sign of an FP128 is in the high register.
111111
let Predicates = [FeatureNoVectorEnhancements1] in
112-
def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 FP128:$src2)))),
112+
def : Pat<(fcopysign FP32:$src1, (f128 FP128:$src2)),
113113
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
114114
let Predicates = [FeatureVectorEnhancements1] in
115-
def : Pat<(fcopysign FP32:$src1, (f32 (fpround (f128 VR128:$src2)))),
115+
def : Pat<(fcopysign FP32:$src1, (f128 VR128:$src2)),
116116
(CPSDRsd FP32:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
117117

118118
// fcopysign with an FP64 result.
@@ -124,10 +124,10 @@ def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;
124124

125125
// The sign of an FP128 is in the high register.
126126
let Predicates = [FeatureNoVectorEnhancements1] in
127-
def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 FP128:$src2)))),
127+
def : Pat<(fcopysign FP64:$src1, (f128 FP128:$src2)),
128128
(CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
129129
let Predicates = [FeatureVectorEnhancements1] in
130-
def : Pat<(fcopysign FP64:$src1, (f64 (fpround (f128 VR128:$src2)))),
130+
def : Pat<(fcopysign FP64:$src1, (f128 VR128:$src2)),
131131
(CPSDRdd FP64:$src1, (EXTRACT_SUBREG VR128:$src2, subreg_h64))>;
132132

133133
// fcopysign with an FP128 result. Use "upper" as the high half and leave

llvm/test/CodeGen/SystemZ/fp-copysign-01.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; Test copysign operations.
1+
; Test copysign libcalls.
22
;
33
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
44

llvm/test/CodeGen/SystemZ/fp-copysign-02.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; Test f128 copysign operations on z14.
1+
; Test f128 copysign libcalls on z14.
22
;
33
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
44

llvm/test/CodeGen/SystemZ/fp-copysign-03.ll

Lines changed: 190 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 \
44
; RUN: | FileCheck %s --check-prefixes=CHECK,Z16
55
;
6-
; Test copysign intrinsics with half.
6+
; Test copysign intrinsics.
77

88
declare half @llvm.copysign.f16(half, half)
99
declare float @llvm.copysign.f32(float, float)
@@ -43,53 +43,25 @@ define half @f2(half %a, double %b) {
4343
}
4444

4545
; Test copysign with an f16 result and f128 sign argument.
46-
; TODO: Let the DAGCombiner remove the fp_round.
4746
define half @f3(half %a, fp128 %b) {
4847
; Z10-LABEL: f3:
4948
; Z10: # %bb.0:
50-
; Z10-NEXT: stmg %r14, %r15, 112(%r15)
51-
; Z10-NEXT: .cfi_offset %r14, -48
52-
; Z10-NEXT: .cfi_offset %r15, -40
53-
; Z10-NEXT: aghi %r15, -184
54-
; Z10-NEXT: .cfi_def_cfa_offset 344
55-
; Z10-NEXT: std %f8, 176(%r15) # 8-byte Spill
56-
; Z10-NEXT: .cfi_offset %f8, -168
5749
; Z10-NEXT: ld %f1, 0(%r2)
5850
; Z10-NEXT: ld %f3, 8(%r2)
59-
; Z10-NEXT: ler %f8, %f0
60-
; Z10-NEXT: la %r2, 160(%r15)
61-
; Z10-NEXT: std %f1, 160(%r15)
62-
; Z10-NEXT: std %f3, 168(%r15)
63-
; Z10-NEXT: brasl %r14, __trunctfhf2@PLT
64-
; Z10-NEXT: cpsdr %f0, %f0, %f8
65-
; Z10-NEXT: ld %f8, 176(%r15) # 8-byte Reload
66-
; Z10-NEXT: lmg %r14, %r15, 296(%r15)
51+
; Z10-NEXT: cpsdr %f0, %f1, %f0
6752
; Z10-NEXT: br %r14
6853
;
6954
; Z16-LABEL: f3:
7055
; Z16: # %bb.0:
71-
; Z16-NEXT: stmg %r14, %r15, 112(%r15)
72-
; Z16-NEXT: .cfi_offset %r14, -48
73-
; Z16-NEXT: .cfi_offset %r15, -40
74-
; Z16-NEXT: aghi %r15, -184
75-
; Z16-NEXT: .cfi_def_cfa_offset 344
76-
; Z16-NEXT: std %f8, 176(%r15) # 8-byte Spill
77-
; Z16-NEXT: .cfi_offset %f8, -168
78-
; Z16-NEXT: ldr %f8, %f0
79-
; Z16-NEXT: vl %v0, 0(%r2), 3
80-
; Z16-NEXT: la %r2, 160(%r15)
81-
; Z16-NEXT: vst %v0, 160(%r15), 3
82-
; Z16-NEXT: brasl %r14, __trunctfhf2@PLT
83-
; Z16-NEXT: cpsdr %f0, %f0, %f8
84-
; Z16-NEXT: ld %f8, 176(%r15) # 8-byte Reload
85-
; Z16-NEXT: lmg %r14, %r15, 296(%r15)
56+
; Z16-NEXT: vl %v1, 0(%r2), 3
57+
; Z16-NEXT: cpsdr %f0, %f1, %f0
8658
; Z16-NEXT: br %r14
8759
%bh = fptrunc fp128 %b to half
8860
%res = call half @llvm.copysign.f16(half %a, half %bh)
8961
ret half %res
9062
}
9163

92-
; Test copysign with an f32 result and half sign argument.
64+
; Test copysign with an f32 result and f16 sign argument.
9365
define float @f4(float %a, half %b) {
9466
; CHECK-LABEL: f4:
9567
; CHECK: # %bb.0:
@@ -100,20 +72,100 @@ define float @f4(float %a, half %b) {
10072
ret float %res
10173
}
10274

103-
; Test copysign with an f64 result and half sign argument.
104-
define double @f5(double %a, half %b) {
75+
; Test copysign with an f32 result and f32 sign argument.
76+
define float @f5(float %a, float %b) {
10577
; CHECK-LABEL: f5:
10678
; CHECK: # %bb.0:
10779
; CHECK-NEXT: cpsdr %f0, %f2, %f0
80+
; CHECK-NEXT: br %r14
81+
%res = call float @llvm.copysign.f32(float %a, float %b)
82+
ret float %res
83+
}
84+
85+
; Test copysign with an f32 result and f64 sign argument.
86+
define float @f6(float %a, double %b) {
87+
; CHECK-LABEL: f6:
88+
; CHECK: # %bb.0:
89+
; CHECK-NEXT: cpsdr %f0, %f2, %f0
90+
; CHECK-NEXT: br %r14
91+
%bf = fptrunc double %b to float
92+
%res = call float @llvm.copysign.f32(float %a, float %bf)
93+
ret float %res
94+
}
95+
96+
; Test copysign with an f32 result and f128 sign argument.
97+
define float @f7(float %a, fp128 %b) {
98+
; Z10-LABEL: f7:
99+
; Z10: # %bb.0:
100+
; Z10-NEXT: ld %f1, 0(%r2)
101+
; Z10-NEXT: ld %f3, 8(%r2)
102+
; Z10-NEXT: cpsdr %f0, %f1, %f0
103+
; Z10-NEXT: br %r14
104+
;
105+
; Z16-LABEL: f7:
106+
; Z16: # %bb.0:
107+
; Z16-NEXT: vl %v1, 0(%r2), 3
108+
; Z16-NEXT: cpsdr %f0, %f1, %f0
109+
; Z16-NEXT: br %r14
110+
%bf = fptrunc fp128 %b to float
111+
%res = call float @llvm.copysign.f32(float %a, float %bf)
112+
ret float %res
113+
}
114+
115+
; Test copysign with an f64 result and f16 sign argument.
116+
define double @f8(double %a, half %b) {
117+
; CHECK-LABEL: f8:
118+
; CHECK: # %bb.0:
119+
; CHECK-NEXT: cpsdr %f0, %f2, %f0
108120
; CHECK-NEXT: br %r14
109121
%bd = fpext half %b to double
110122
%res = call double @llvm.copysign.f64(double %a, double %bd)
111123
ret double %res
112124
}
113125

114-
; Test copysign with an f128 result and half sign argument.
115-
define fp128 @f6(fp128 %a, half %b) {
116-
; Z10-LABEL: f6:
126+
; Test copysign with an f64 result and f32 sign argument.
127+
define double @f9(double %a, float %b) {
128+
; CHECK-LABEL: f9:
129+
; CHECK: # %bb.0:
130+
; CHECK-NEXT: cpsdr %f0, %f2, %f0
131+
; CHECK-NEXT: br %r14
132+
%bd = fpext float %b to double
133+
%res = call double @llvm.copysign.f64(double %a, double %bd)
134+
ret double %res
135+
}
136+
137+
; Test copysign with an f64 result and f64 sign argument.
138+
define double @f10(double %a, double %b) {
139+
; CHECK-LABEL: f10:
140+
; CHECK: # %bb.0:
141+
; CHECK-NEXT: cpsdr %f0, %f2, %f0
142+
; CHECK-NEXT: br %r14
143+
%res = call double @llvm.copysign.f64(double %a, double %b)
144+
ret double %res
145+
}
146+
147+
; Test copysign with an f64 result and f128 sign argument.
148+
define double @f11(double %a, fp128 %b) {
149+
; Z10-LABEL: f11:
150+
; Z10: # %bb.0:
151+
; Z10-NEXT: ld %f1, 0(%r2)
152+
; Z10-NEXT: ld %f3, 8(%r2)
153+
; Z10-NEXT: cpsdr %f0, %f1, %f0
154+
; Z10-NEXT: br %r14
155+
;
156+
; Z16-LABEL: f11:
157+
; Z16: # %bb.0:
158+
; Z16-NEXT: vl %v1, 0(%r2), 3
159+
; Z16-NEXT: cpsdr %f0, %f1, %f0
160+
; Z16-NEXT: br %r14
161+
%bd = fptrunc fp128 %b to double
162+
%res = call double @llvm.copysign.f64(double %a, double %bd)
163+
ret double %res
164+
}
165+
166+
; Test copysign with an f128 result and f16 sign argument.
167+
define fp128 @f12(fp128 %a, half %b) {
168+
; Z10-LABEL: f12:
117169
; Z10: # %bb.0:
118170
; Z10-NEXT: ld %f1, 0(%r3)
119171
; Z10-NEXT: ld %f3, 8(%r3)
@@ -122,24 +174,117 @@ define fp128 @f6(fp128 %a, half %b) {
122174
; Z10-NEXT: std %f3, 8(%r2)
123175
; Z10-NEXT: br %r14
124176
;
125-
; Z16-LABEL: f6:
177+
; Z16-LABEL: f12:
126178
; Z16: # %bb.0:
127179
; Z16-NEXT: aghi %r15, -168
128180
; Z16-NEXT: .cfi_def_cfa_offset 328
129181
; Z16-NEXT: vl %v1, 0(%r3), 3
130182
; Z16-NEXT: vsteh %v0, 164(%r15), 0
131183
; Z16-NEXT: tm 164(%r15), 128
132-
; Z16-NEXT: je .LBB6_2
184+
; Z16-NEXT: je .LBB12_2
133185
; Z16-NEXT: # %bb.1:
134186
; Z16-NEXT: wflnxb %v0, %v1
135-
; Z16-NEXT: j .LBB6_3
136-
; Z16-NEXT: .LBB6_2:
187+
; Z16-NEXT: j .LBB12_3
188+
; Z16-NEXT: .LBB12_2:
137189
; Z16-NEXT: wflpxb %v0, %v1
138-
; Z16-NEXT: .LBB6_3:
190+
; Z16-NEXT: .LBB12_3:
139191
; Z16-NEXT: vst %v0, 0(%r2), 3
140192
; Z16-NEXT: aghi %r15, 168
141193
; Z16-NEXT: br %r14
142-
%bd = fpext half %b to fp128
143-
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %bd)
194+
%b128 = fpext half %b to fp128
195+
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
196+
ret fp128 %res
197+
}
198+
199+
; Test copysign with an f128 result and f32 sign argument.
200+
define fp128 @f13(fp128 %a, float %b) {
201+
; Z10-LABEL: f13:
202+
; Z10: # %bb.0:
203+
; Z10-NEXT: ld %f1, 0(%r3)
204+
; Z10-NEXT: ld %f3, 8(%r3)
205+
; Z10-NEXT: cpsdr %f1, %f0, %f1
206+
; Z10-NEXT: std %f1, 0(%r2)
207+
; Z10-NEXT: std %f3, 8(%r2)
208+
; Z10-NEXT: br %r14
209+
;
210+
; Z16-LABEL: f13:
211+
; Z16: # %bb.0:
212+
; Z16-NEXT: vl %v1, 0(%r3), 3
213+
; Z16-NEXT: vlgvf %r0, %v0, 0
214+
; Z16-NEXT: tmlh %r0, 32768
215+
; Z16-NEXT: je .LBB13_2
216+
; Z16-NEXT: # %bb.1:
217+
; Z16-NEXT: wflnxb %v0, %v1
218+
; Z16-NEXT: vst %v0, 0(%r2), 3
219+
; Z16-NEXT: br %r14
220+
; Z16-NEXT: .LBB13_2:
221+
; Z16-NEXT: wflpxb %v0, %v1
222+
; Z16-NEXT: vst %v0, 0(%r2), 3
223+
; Z16-NEXT: br %r14
224+
%b128 = fpext float %b to fp128
225+
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
226+
ret fp128 %res
227+
}
228+
229+
; Test copysign with an f128 result and f64 sign argument.
230+
define fp128 @f14(fp128 %a, double %b) {
231+
; Z10-LABEL: f14:
232+
; Z10: # %bb.0:
233+
; Z10-NEXT: ld %f1, 0(%r3)
234+
; Z10-NEXT: ld %f3, 8(%r3)
235+
; Z10-NEXT: cpsdr %f1, %f0, %f1
236+
; Z10-NEXT: std %f1, 0(%r2)
237+
; Z10-NEXT: std %f3, 8(%r2)
238+
; Z10-NEXT: br %r14
239+
;
240+
; Z16-LABEL: f14:
241+
; Z16: # %bb.0:
242+
; Z16-NEXT: vl %v1, 0(%r3), 3
243+
; Z16-NEXT: lgdr %r0, %f0
244+
; Z16-NEXT: tmhh %r0, 32768
245+
; Z16-NEXT: je .LBB14_2
246+
; Z16-NEXT: # %bb.1:
247+
; Z16-NEXT: wflnxb %v0, %v1
248+
; Z16-NEXT: vst %v0, 0(%r2), 3
249+
; Z16-NEXT: br %r14
250+
; Z16-NEXT: .LBB14_2:
251+
; Z16-NEXT: wflpxb %v0, %v1
252+
; Z16-NEXT: vst %v0, 0(%r2), 3
253+
; Z16-NEXT: br %r14
254+
%b128 = fpext double %b to fp128
255+
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b128)
256+
ret fp128 %res
257+
}
258+
259+
; Test copysign with an f128 result and f128 sign argument.
260+
define fp128 @f15(fp128 %a, fp128 %b) {
261+
; Z10-LABEL: f15:
262+
; Z10: # %bb.0:
263+
; Z10-NEXT: ld %f0, 0(%r3)
264+
; Z10-NEXT: ld %f2, 8(%r3)
265+
; Z10-NEXT: ld %f1, 0(%r4)
266+
; Z10-NEXT: ld %f3, 8(%r4)
267+
; Z10-NEXT: cpsdr %f0, %f1, %f0
268+
; Z10-NEXT: std %f0, 0(%r2)
269+
; Z10-NEXT: std %f2, 8(%r2)
270+
; Z10-NEXT: br %r14
271+
;
272+
; Z16-LABEL: f15:
273+
; Z16: # %bb.0:
274+
; Z16-NEXT: larl %r1, .LCPI15_0
275+
; Z16-NEXT: vl %v1, 0(%r4), 3
276+
; Z16-NEXT: vl %v2, 0(%r1), 3
277+
; Z16-NEXT: vl %v0, 0(%r3), 3
278+
; Z16-NEXT: vtm %v1, %v2
279+
; Z16-NEXT: je .LBB15_2
280+
; Z16-NEXT: # %bb.1:
281+
; Z16-NEXT: wflnxb %v0, %v0
282+
; Z16-NEXT: vst %v0, 0(%r2), 3
283+
; Z16-NEXT: br %r14
284+
; Z16-NEXT: .LBB15_2:
285+
; Z16-NEXT: wflpxb %v0, %v0
286+
; Z16-NEXT: vst %v0, 0(%r2), 3
287+
; Z16-NEXT: br %r14
288+
%res = call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
144289
ret fp128 %res
145290
}

0 commit comments

Comments
 (0)