Skip to content

Commit 95cf66d

Browse files
committed
[DAGCombiner] Remove explicit call to AddToWorklist in sqrt and reciprocal computations
Summary: These nodes end up being processed regardless due to DAGCombiner ensuring arguments are processed. This changes the order in which nodes are processed, which fixes an issue on PowerPC. Reviewers: craig.topper, efriedma, RKSimon, lebedev.ri, mcberg2017, stefanp, hfinkel Subscribers: nemanjai, MaskRay, jsji, steven.zhang, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66548 llvm-svn: 369662
1 parent c9649eb commit 95cf66d

File tree

2 files changed

+10
-56
lines changed

2 files changed

+10
-56
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20084,16 +20084,9 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
2008420084
// Newton iterations: Est = Est + Est (1 - Arg * Est)
2008520085
for (int i = 0; i < Iterations; ++i) {
2008620086
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
20087-
AddToWorklist(NewEst.getNode());
20088-
2008920087
NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
20090-
AddToWorklist(NewEst.getNode());
20091-
2009220088
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20093-
AddToWorklist(NewEst.getNode());
20094-
2009520089
Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
20096-
AddToWorklist(Est.getNode());
2009720090
}
2009820091
}
2009920092
return Est;
@@ -20118,31 +20111,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
2011820111
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
2011920112
// this entire sequence requires only one FP constant.
2012020113
SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
20121-
AddToWorklist(HalfArg.getNode());
20122-
2012320114
HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
20124-
AddToWorklist(HalfArg.getNode());
2012520115

2012620116
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
2012720117
for (unsigned i = 0; i < Iterations; ++i) {
2012820118
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
20129-
AddToWorklist(NewEst.getNode());
20130-
2013120119
NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
20132-
AddToWorklist(NewEst.getNode());
20133-
2013420120
NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
20135-
AddToWorklist(NewEst.getNode());
20136-
2013720121
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20138-
AddToWorklist(Est.getNode());
2013920122
}
2014020123

2014120124
// If non-reciprocal square root is requested, multiply the result by Arg.
20142-
if (!Reciprocal) {
20125+
if (!Reciprocal)
2014320126
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
20144-
AddToWorklist(Est.getNode());
20145-
}
2014620127

2014720128
return Est;
2014820129
}
@@ -20168,13 +20149,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
2016820149
// E = (E * -0.5) * ((A * E) * E + -3.0)
2016920150
for (unsigned i = 0; i < Iterations; ++i) {
2017020151
SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
20171-
AddToWorklist(AE.getNode());
20172-
2017320152
SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
20174-
AddToWorklist(AEE.getNode());
20175-
2017620153
SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
20177-
AddToWorklist(RHS.getNode());
2017820154

2017920155
// When calculating a square root at the last iteration build:
2018020156
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -20187,10 +20163,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
2018720163
// SQRT: LHS = (A * E) * -0.5
2018820164
LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
2018920165
}
20190-
AddToWorklist(LHS.getNode());
2019120166

2019220167
Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
20193-
AddToWorklist(Est.getNode());
2019420168
}
2019520169

2019620170
return Est;
@@ -20247,16 +20221,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
2024720221
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
2024820222
SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
2024920223
Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
20250-
AddToWorklist(Fabs.getNode());
20251-
AddToWorklist(IsDenorm.getNode());
20252-
AddToWorklist(Est.getNode());
2025320224
} else {
2025420225
// X == 0.0 ? 0.0 : Est
2025520226
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
2025620227
SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
2025720228
Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
20258-
AddToWorklist(IsZero.getNode());
20259-
AddToWorklist(Est.getNode());
2026020229
}
2026120230
}
2026220231
}

llvm/test/CodeGen/PowerPC/qpx-recipest.ll

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -57,21 +57,16 @@ entry:
5757
ret <4 x double> %r
5858
}
5959

60-
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
61-
; an qvfmadd instead of a qvfnmsubs
6260
define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {
6361
; CHECK-LABEL: foof_fmf:
6462
; CHECK: # %bb.0: # %entry
6563
; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha
6664
; CHECK-NEXT: qvfrsqrtes 3, 2
6765
; CHECK-NEXT: addi 3, 3, .LCPI2_0@toc@l
6866
; CHECK-NEXT: qvlfsx 0, 0, 3
69-
; CHECK-NEXT: addis 3, 2, .LCPI2_1@toc@ha
70-
; CHECK-NEXT: addi 3, 3, .LCPI2_1@toc@l
71-
; CHECK-NEXT: qvlfsx 4, 0, 3
72-
; CHECK-NEXT: qvfmadds 0, 2, 0, 2
73-
; CHECK-NEXT: qvfmuls 2, 3, 3
74-
; CHECK-NEXT: qvfmadds 0, 0, 2, 4
67+
; CHECK-NEXT: qvfmuls 4, 3, 3
68+
; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2
69+
; CHECK-NEXT: qvfmadds 0, 2, 4, 0
7570
; CHECK-NEXT: qvfmuls 0, 3, 0
7671
; CHECK-NEXT: qvfmul 1, 1, 0
7772
; CHECK-NEXT: blr
@@ -179,21 +174,16 @@ entry:
179174
ret <4 x float> %r
180175
}
181176

182-
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
183-
; an qvfmadd instead of a qvfnmsubs
184177
define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
185178
; CHECK-LABEL: goo_fmf:
186179
; CHECK: # %bb.0: # %entry
187180
; CHECK-NEXT: addis 3, 2, .LCPI6_0@toc@ha
188181
; CHECK-NEXT: qvfrsqrtes 3, 2
189182
; CHECK-NEXT: addi 3, 3, .LCPI6_0@toc@l
190183
; CHECK-NEXT: qvlfsx 0, 0, 3
191-
; CHECK-NEXT: addis 3, 2, .LCPI6_1@toc@ha
192-
; CHECK-NEXT: addi 3, 3, .LCPI6_1@toc@l
193-
; CHECK-NEXT: qvlfsx 4, 0, 3
194-
; CHECK-NEXT: qvfmadds 0, 2, 0, 2
195-
; CHECK-NEXT: qvfmuls 2, 3, 3
196-
; CHECK-NEXT: qvfmadds 0, 0, 2, 4
184+
; CHECK-NEXT: qvfmuls 4, 3, 3
185+
; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2
186+
; CHECK-NEXT: qvfmadds 0, 2, 4, 0
197187
; CHECK-NEXT: qvfmuls 0, 3, 0
198188
; CHECK-NEXT: qvfmuls 1, 1, 0
199189
; CHECK-NEXT: blr
@@ -360,23 +350,18 @@ entry:
360350
ret <4 x double> %r
361351
}
362352

363-
; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
364-
; an qvfmadds instead of a qvfnmsubs
365353
define <4 x float> @goo3_fmf(<4 x float> %a) nounwind {
366354
; CHECK-LABEL: goo3_fmf:
367355
; CHECK: # %bb.0: # %entry
368356
; CHECK-NEXT: addis 3, 2, .LCPI14_1@toc@ha
369357
; CHECK-NEXT: qvfrsqrtes 2, 1
370358
; CHECK-NEXT: addi 3, 3, .LCPI14_1@toc@l
371359
; CHECK-NEXT: qvlfsx 0, 0, 3
372-
; CHECK-NEXT: addis 3, 2, .LCPI14_2@toc@ha
373-
; CHECK-NEXT: addi 3, 3, .LCPI14_2@toc@l
374-
; CHECK-NEXT: qvlfsx 3, 0, 3
375360
; CHECK-NEXT: addis 3, 2, .LCPI14_0@toc@ha
376-
; CHECK-NEXT: qvfmuls 4, 2, 2
377361
; CHECK-NEXT: addi 3, 3, .LCPI14_0@toc@l
378-
; CHECK-NEXT: qvfmadds 0, 1, 0, 1
379-
; CHECK-NEXT: qvfmadds 0, 0, 4, 3
362+
; CHECK-NEXT: qvfmuls 4, 2, 2
363+
; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1
364+
; CHECK-NEXT: qvfmadds 0, 3, 4, 0
380365
; CHECK-NEXT: qvlfsx 3, 0, 3
381366
; CHECK-NEXT: qvfmuls 0, 2, 0
382367
; CHECK-NEXT: qvfmuls 0, 0, 1

0 commit comments

Comments
 (0)