Skip to content

Commit 5fd0d86

Browse files
[PowerPC][SelectionDAG] Expand @llvm.copysign.ppc_fp128 without copysignl
This allows ripping out the remaining copysignl infra.
1 parent e277a8e commit 5fd0d86

File tree

6 files changed

+161
-222
lines changed

6 files changed

+161
-222
lines changed

llvm/include/llvm/IR/RuntimeLibcalls.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ HANDLE_LIBCALL(FLOOR_F64, "floor")
289289
HANDLE_LIBCALL(FLOOR_F80, "floorl")
290290
HANDLE_LIBCALL(FLOOR_F128, "floorl")
291291
HANDLE_LIBCALL(FLOOR_PPCF128, "floorl")
292-
HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl")
293292
HANDLE_LIBCALL(FMIN_F32, "fminf")
294293
HANDLE_LIBCALL(FMIN_F64, "fmin")
295294
HANDLE_LIBCALL(FMIN_F80, "fminl")

llvm/lib/CodeGen/IntrinsicLowering.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -437,18 +437,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
437437
ReplaceFPIntrinsicWithCall(CI, "roundevenf", "roundeven", "roundevenl");
438438
break;
439439
}
440-
case Intrinsic::copysign: {
441-
switch (CI->getArgOperand(0)->getType()->getTypeID()) {
442-
default:
443-
report_fatal_error("copysign intrinsic without arch-specific floats "
444-
"reached intrinsic-to-libcall lowering");
445-
break;
446-
case Type::PPC_FP128TyID:
447-
ReplaceCallWith("copysignl", CI, CI->arg_begin(), CI->arg_end(),
448-
Type::getFloatTy(CI->getContext()));
449-
}
450-
break;
451-
}
452440
case Intrinsic::get_rounding:
453441
// Lower to "round to the nearest"
454442
if (!CI->getType()->isVoidTy())

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,11 +1710,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
17101710
void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
17111711
SDValue &Lo, SDValue &Hi) {
17121712

1713-
EVT VT = N->getValueType(0);
1714-
ExpandFloatRes_Binary(
1715-
N,
1716-
(VT == MVT::ppcf128 ? RTLIB::COPYSIGN_PPCF128 : RTLIB::UNKNOWN_LIBCALL),
1717-
Lo, Hi);
1713+
assert(N->getValueType(0) == MVT::ppcf128 &&
1714+
"Logic only correct for ppcf128!");
1715+
SDLoc DL = SDLoc(N);
1716+
SDValue Tmp = SDValue();
1717+
GetExpandedFloat(N->getOperand(0), Lo, Tmp);
1718+
1719+
Hi = DAG.getNode(ISD::FCOPYSIGN, DL, Tmp.getValueType(), Tmp,
1720+
N->getOperand(1));
1721+
// A double-double is Hi + Lo, so if Hi flips sign, so must Lo
1722+
Lo = DAG.getSelectCC(DL, Tmp, Hi, Lo,
1723+
DAG.getNode(ISD::FNEG, DL, Lo.getValueType(), Lo),
1724+
ISD::SETEQ);
17181725
}
17191726

17201727
void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,

llvm/test/CodeGen/PowerPC/copysignl.ll

Lines changed: 45 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -43,31 +43,26 @@ declare double @copysign(double, double) #0
4343
define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
4444
; CHECK-LABEL: foo_ll:
4545
; CHECK: # %bb.0: # %entry
46-
; CHECK-NEXT: mflr 0
47-
; CHECK-NEXT: stdu 1, -112(1)
48-
; CHECK-NEXT: fmr 3, 2
49-
; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha
50-
; CHECK-NEXT: std 0, 128(1)
51-
; CHECK-NEXT: lfs 2, .LCPI2_0@toc@l(3)
52-
; CHECK-NEXT: bl copysignl
53-
; CHECK-NEXT: nop
54-
; CHECK-NEXT: addi 1, 1, 112
55-
; CHECK-NEXT: ld 0, 16(1)
56-
; CHECK-NEXT: mtlr 0
46+
; CHECK-NEXT: fcpsgn 0, 2, 1
47+
; CHECK-NEXT: li 3, 0
48+
; CHECK-NEXT: li 4, 8
49+
; CHECK-NEXT: fcmpu 0, 1, 0
50+
; CHECK-NEXT: fmr 1, 0
51+
; CHECK-NEXT: iseleq 3, 4, 3
52+
; CHECK-NEXT: addis 4, 2, .LCPI2_0@toc@ha
53+
; CHECK-NEXT: addi 4, 4, .LCPI2_0@toc@l
54+
; CHECK-NEXT: lfdx 2, 4, 3
5755
; CHECK-NEXT: blr
5856
;
5957
; CHECK-VSX-LABEL: foo_ll:
6058
; CHECK-VSX: # %bb.0: # %entry
61-
; CHECK-VSX-NEXT: mflr 0
62-
; CHECK-VSX-NEXT: stdu 1, -112(1)
63-
; CHECK-VSX-NEXT: fmr 3, 2
59+
; CHECK-VSX-NEXT: fmr 0, 1
60+
; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1
6461
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
65-
; CHECK-VSX-NEXT: std 0, 128(1)
66-
; CHECK-VSX-NEXT: bl copysignl
67-
; CHECK-VSX-NEXT: nop
68-
; CHECK-VSX-NEXT: addi 1, 1, 112
69-
; CHECK-VSX-NEXT: ld 0, 16(1)
70-
; CHECK-VSX-NEXT: mtlr 0
62+
; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
63+
; CHECK-VSX-NEXT: beqlr 0
64+
; CHECK-VSX-NEXT: # %bb.1: # %entry
65+
; CHECK-VSX-NEXT: xsnegdp 2, 2
7166
; CHECK-VSX-NEXT: blr
7267
entry:
7368
%conv = fpext double %a to ppc_fp128
@@ -78,31 +73,26 @@ entry:
7873
define ppc_fp128 @foo_ld(double %a, double %b) #0 {
7974
; CHECK-LABEL: foo_ld:
8075
; CHECK: # %bb.0: # %entry
81-
; CHECK-NEXT: mflr 0
82-
; CHECK-NEXT: stdu 1, -112(1)
83-
; CHECK-NEXT: fmr 3, 2
84-
; CHECK-NEXT: addis 3, 2, .LCPI3_0@toc@ha
85-
; CHECK-NEXT: std 0, 128(1)
86-
; CHECK-NEXT: lfs 2, .LCPI3_0@toc@l(3)
87-
; CHECK-NEXT: bl copysignl
88-
; CHECK-NEXT: nop
89-
; CHECK-NEXT: addi 1, 1, 112
90-
; CHECK-NEXT: ld 0, 16(1)
91-
; CHECK-NEXT: mtlr 0
76+
; CHECK-NEXT: fcpsgn 0, 2, 1
77+
; CHECK-NEXT: li 3, 0
78+
; CHECK-NEXT: li 4, 8
79+
; CHECK-NEXT: fcmpu 0, 1, 0
80+
; CHECK-NEXT: fmr 1, 0
81+
; CHECK-NEXT: iseleq 3, 4, 3
82+
; CHECK-NEXT: addis 4, 2, .LCPI3_0@toc@ha
83+
; CHECK-NEXT: addi 4, 4, .LCPI3_0@toc@l
84+
; CHECK-NEXT: lfdx 2, 4, 3
9285
; CHECK-NEXT: blr
9386
;
9487
; CHECK-VSX-LABEL: foo_ld:
9588
; CHECK-VSX: # %bb.0: # %entry
96-
; CHECK-VSX-NEXT: mflr 0
97-
; CHECK-VSX-NEXT: stdu 1, -112(1)
98-
; CHECK-VSX-NEXT: fmr 3, 2
89+
; CHECK-VSX-NEXT: fmr 0, 1
90+
; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1
9991
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
100-
; CHECK-VSX-NEXT: std 0, 128(1)
101-
; CHECK-VSX-NEXT: bl copysignl
102-
; CHECK-VSX-NEXT: nop
103-
; CHECK-VSX-NEXT: addi 1, 1, 112
104-
; CHECK-VSX-NEXT: ld 0, 16(1)
105-
; CHECK-VSX-NEXT: mtlr 0
92+
; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
93+
; CHECK-VSX-NEXT: beqlr 0
94+
; CHECK-VSX-NEXT: # %bb.1: # %entry
95+
; CHECK-VSX-NEXT: xsnegdp 2, 2
10696
; CHECK-VSX-NEXT: blr
10797
entry:
10898
%conv = fpext double %a to ppc_fp128
@@ -114,31 +104,26 @@ entry:
114104
define ppc_fp128 @foo_lf(double %a, float %b) #0 {
115105
; CHECK-LABEL: foo_lf:
116106
; CHECK: # %bb.0: # %entry
117-
; CHECK-NEXT: mflr 0
118-
; CHECK-NEXT: stdu 1, -112(1)
119-
; CHECK-NEXT: fmr 3, 2
120-
; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha
121-
; CHECK-NEXT: std 0, 128(1)
122-
; CHECK-NEXT: lfs 2, .LCPI4_0@toc@l(3)
123-
; CHECK-NEXT: bl copysignl
124-
; CHECK-NEXT: nop
125-
; CHECK-NEXT: addi 1, 1, 112
126-
; CHECK-NEXT: ld 0, 16(1)
127-
; CHECK-NEXT: mtlr 0
107+
; CHECK-NEXT: fcpsgn 0, 2, 1
108+
; CHECK-NEXT: li 3, 0
109+
; CHECK-NEXT: li 4, 8
110+
; CHECK-NEXT: fcmpu 0, 1, 0
111+
; CHECK-NEXT: fmr 1, 0
112+
; CHECK-NEXT: iseleq 3, 4, 3
113+
; CHECK-NEXT: addis 4, 2, .LCPI4_0@toc@ha
114+
; CHECK-NEXT: addi 4, 4, .LCPI4_0@toc@l
115+
; CHECK-NEXT: lfdx 2, 4, 3
128116
; CHECK-NEXT: blr
129117
;
130118
; CHECK-VSX-LABEL: foo_lf:
131119
; CHECK-VSX: # %bb.0: # %entry
132-
; CHECK-VSX-NEXT: mflr 0
133-
; CHECK-VSX-NEXT: stdu 1, -112(1)
134-
; CHECK-VSX-NEXT: fmr 3, 2
120+
; CHECK-VSX-NEXT: fmr 0, 1
121+
; CHECK-VSX-NEXT: fcpsgn 1, 2, 1
135122
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
136-
; CHECK-VSX-NEXT: std 0, 128(1)
137-
; CHECK-VSX-NEXT: bl copysignl
138-
; CHECK-VSX-NEXT: nop
139-
; CHECK-VSX-NEXT: addi 1, 1, 112
140-
; CHECK-VSX-NEXT: ld 0, 16(1)
141-
; CHECK-VSX-NEXT: mtlr 0
123+
; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
124+
; CHECK-VSX-NEXT: beqlr 0
125+
; CHECK-VSX-NEXT: # %bb.1: # %entry
126+
; CHECK-VSX-NEXT: xsnegdp 2, 2
142127
; CHECK-VSX-NEXT: blr
143128
entry:
144129
%conv = fpext double %a to ppc_fp128

llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll

Lines changed: 53 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4,84 +4,81 @@
44
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
55
target triple = "powerpc-unknown-linux-gnu"
66

7+
; Previously we checked that loops that used CTR would not be used around a libm call to copysignl
8+
; but now that copysignl is no longer emitted by LLVM in most cases, this stands as a tombstone.
9+
; It has mtctr right in the middle, but we don't care because copysignl is nowhere to be found.
10+
711
define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly {
812
; CHECK-LABEL: foo:
913
; CHECK-NOT: mtctr
1014
; CHECK: # %bb.0: # %entry
11-
; CHECK-NEXT: mflr 0
12-
; CHECK-NEXT: stwu 1, -176(1)
13-
; CHECK-NEXT: stw 0, 180(1)
14-
; CHECK-NEXT: stfd 2, 128(1)
15-
; CHECK-NEXT: lwz 3, 132(1)
16-
; CHECK-NEXT: stfd 1, 136(1)
17-
; CHECK-NEXT: stw 3, 148(1)
18-
; CHECK-NEXT: lwz 3, 128(1)
19-
; CHECK-NEXT: stfd 31, 168(1) # 8-byte Folded Spill
20-
; CHECK-NEXT: stw 3, 144(1)
21-
; CHECK-NEXT: lwz 3, 140(1)
22-
; CHECK-NEXT: lfd 0, 144(1)
23-
; CHECK-NEXT: stw 3, 156(1)
24-
; CHECK-NEXT: lwz 3, 136(1)
25-
; CHECK-NEXT: stw 30, 160(1) # 4-byte Folded Spill
26-
; CHECK-NEXT: li 30, 2048
27-
; CHECK-NEXT: stw 3, 152(1)
28-
; CHECK-NEXT: lfd 31, 152(1)
29-
; CHECK-NEXT: fmr 1, 31
15+
; CHECK-NEXT: stwu 1, -112(1)
16+
; CHECK-NEXT: stfd 2, 80(1)
17+
; CHECK-NEXT: li 3, 2048
18+
; CHECK-NEXT: lwz 4, 84(1)
19+
; CHECK-NEXT: stfd 1, 88(1)
20+
; CHECK-NEXT: stw 4, 100(1)
21+
; CHECK-NEXT: lwz 4, 80(1)
22+
; CHECK-NEXT: stw 4, 96(1)
23+
; CHECK-NEXT: lwz 4, 92(1)
24+
; CHECK-NEXT: lfd 1, 96(1)
25+
; CHECK-NEXT: stw 4, 108(1)
26+
; CHECK-NEXT: lwz 4, 88(1)
27+
; CHECK-NEXT: stw 4, 104(1)
28+
; CHECK-NEXT: lfd 0, 104(1)
29+
; CHECK-NEXT: mtctr 3
30+
; CHECK-NEXT: fmr 2, 0
31+
; CHECK-NEXT: b .LBB0_2
3032
; CHECK-NEXT: .LBB0_1: # %for.body
3133
; CHECK-NEXT: #
32-
; CHECK-NEXT: stfd 1, 64(1)
33-
; CHECK-NEXT: lwz 3, 68(1)
34-
; CHECK-NEXT: stfd 31, 88(1)
35-
; CHECK-NEXT: stw 3, 84(1)
36-
; CHECK-NEXT: lwz 3, 64(1)
37-
; CHECK-NEXT: stfd 0, 56(1)
38-
; CHECK-NEXT: stw 3, 80(1)
39-
; CHECK-NEXT: lwz 3, 92(1)
40-
; CHECK-NEXT: lfd 4, 96(1)
41-
; CHECK-NEXT: stw 3, 108(1)
42-
; CHECK-NEXT: lwz 3, 88(1)
43-
; CHECK-NEXT: lfd 1, 80(1)
44-
; CHECK-NEXT: stw 3, 104(1)
4534
; CHECK-NEXT: lwz 3, 60(1)
46-
; CHECK-NEXT: lfd 3, 104(1)
35+
; CHECK-NEXT: stfd 1, 48(1)
4736
; CHECK-NEXT: stw 3, 76(1)
4837
; CHECK-NEXT: lwz 3, 56(1)
4938
; CHECK-NEXT: stw 3, 72(1)
50-
; CHECK-NEXT: lfd 2, 72(1)
51-
; CHECK-NEXT: bl copysignl
52-
; CHECK-NEXT: stfd 2, 48(1)
53-
; CHECK-NEXT: addi 30, 30, -1
5439
; CHECK-NEXT: lwz 3, 52(1)
55-
; CHECK-NEXT: cmplwi 30, 0
56-
; CHECK-NEXT: stfd 1, 40(1)
57-
; CHECK-NEXT: stw 3, 116(1)
40+
; CHECK-NEXT: lfd 2, 72(1)
41+
; CHECK-NEXT: stw 3, 68(1)
5842
; CHECK-NEXT: lwz 3, 48(1)
59-
; CHECK-NEXT: stw 3, 112(1)
60-
; CHECK-NEXT: lwz 3, 44(1)
61-
; CHECK-NEXT: lfd 0, 112(1)
62-
; CHECK-NEXT: stw 3, 124(1)
63-
; CHECK-NEXT: lwz 3, 40(1)
64-
; CHECK-NEXT: stw 3, 120(1)
65-
; CHECK-NEXT: lfd 1, 120(1)
66-
; CHECK-NEXT: bc 12, 1, .LBB0_1
67-
; CHECK-NEXT: # %bb.2: # %for.end
68-
; CHECK-NEXT: stfd 1, 16(1)
43+
; CHECK-NEXT: stw 3, 64(1)
44+
; CHECK-NEXT: lfd 1, 64(1)
45+
; CHECK-NEXT: bdz .LBB0_7
46+
; CHECK-NEXT: .LBB0_2: # %for.body
47+
; CHECK-NEXT: #
48+
; CHECK-NEXT: stfd 0, 40(1)
49+
; CHECK-NEXT: lbz 3, 40(1)
50+
; CHECK-NEXT: srwi 3, 3, 7
51+
; CHECK-NEXT: andi. 3, 3, 1
52+
; CHECK-NEXT: bc 12, 1, .LBB0_4
53+
; CHECK-NEXT: # %bb.3: # %for.body
54+
; CHECK-NEXT: #
55+
; CHECK-NEXT: fabs 3, 2
56+
; CHECK-NEXT: b .LBB0_5
57+
; CHECK-NEXT: .LBB0_4:
58+
; CHECK-NEXT: fnabs 3, 2
59+
; CHECK-NEXT: .LBB0_5: # %for.body
60+
; CHECK-NEXT: #
61+
; CHECK-NEXT: fcmpu 0, 2, 3
62+
; CHECK-NEXT: stfd 3, 56(1)
63+
; CHECK-NEXT: beq 0, .LBB0_1
64+
; CHECK-NEXT: # %bb.6: # %for.body
65+
; CHECK-NEXT: #
66+
; CHECK-NEXT: fneg 1, 1
67+
; CHECK-NEXT: b .LBB0_1
68+
; CHECK-NEXT: .LBB0_7: # %for.end
69+
; CHECK-NEXT: stfd 2, 16(1)
6970
; CHECK-NEXT: lwz 3, 20(1)
70-
; CHECK-NEXT: stfd 0, 8(1)
71+
; CHECK-NEXT: stfd 1, 8(1)
7172
; CHECK-NEXT: stw 3, 36(1)
7273
; CHECK-NEXT: lwz 3, 16(1)
73-
; CHECK-NEXT: lfd 31, 168(1) # 8-byte Folded Reload
7474
; CHECK-NEXT: stw 3, 32(1)
7575
; CHECK-NEXT: lwz 3, 12(1)
7676
; CHECK-NEXT: lfd 1, 32(1)
7777
; CHECK-NEXT: stw 3, 28(1)
7878
; CHECK-NEXT: lwz 3, 8(1)
79-
; CHECK-NEXT: lwz 30, 160(1) # 4-byte Folded Reload
8079
; CHECK-NEXT: stw 3, 24(1)
8180
; CHECK-NEXT: lfd 2, 24(1)
82-
; CHECK-NEXT: lwz 0, 180(1)
83-
; CHECK-NEXT: addi 1, 1, 176
84-
; CHECK-NEXT: mtlr 0
81+
; CHECK-NEXT: addi 1, 1, 112
8582
; CHECK-NEXT: blr
8683
entry:
8784
br label %for.body
@@ -103,4 +100,3 @@ for.end: ; preds = %for.body
103100
declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
104101

105102
; CHECK-NOT: mtctr
106-

0 commit comments

Comments
 (0)