Skip to content

Commit 89d48cc

Browse files
committed
AMDGPU: Fix not emitting nofpexcept on fdiv expansion
In this awkward case, we have to emit custom pseudo-constrained FP wrappers. InstrEmitter concludes that since a mayRaiseFPException instruction had a chain, it can't add nofpexcept. Test deferred until mayRaiseFPException is really set on everything.
1 parent 11c617c commit 89d48cc

File tree

1 file changed

+35
-20
lines changed

1 file changed

+35
-20
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7907,9 +7907,10 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
79077907
}
79087908

79097909
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
7910-
EVT VT, SDValue A, SDValue B, SDValue GlueChain) {
7910+
EVT VT, SDValue A, SDValue B, SDValue GlueChain,
7911+
SDNodeFlags Flags) {
79117912
if (GlueChain->getNumValues() <= 1) {
7912-
return DAG.getNode(Opcode, SL, VT, A, B);
7913+
return DAG.getNode(Opcode, SL, VT, A, B, Flags);
79137914
}
79147915

79157916
assert(GlueChain->getNumValues() == 3);
@@ -7922,15 +7923,16 @@ static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
79227923
break;
79237924
}
79247925

7925-
return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B,
7926-
GlueChain.getValue(2));
7926+
return DAG.getNode(Opcode, SL, VTList,
7927+
{GlueChain.getValue(1), A, B, GlueChain.getValue(2)},
7928+
Flags);
79277929
}
79287930

79297931
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
79307932
EVT VT, SDValue A, SDValue B, SDValue C,
7931-
SDValue GlueChain) {
7933+
SDValue GlueChain, SDNodeFlags Flags) {
79327934
if (GlueChain->getNumValues() <= 1) {
7933-
return DAG.getNode(Opcode, SL, VT, A, B, C);
7935+
return DAG.getNode(Opcode, SL, VT, {A, B, C}, Flags);
79347936
}
79357937

79367938
assert(GlueChain->getNumValues() == 3);
@@ -7943,8 +7945,9 @@ static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
79437945
break;
79447946
}
79457947

7946-
return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B, C,
7947-
GlueChain.getValue(2));
7948+
return DAG.getNode(Opcode, SL, VTList,
7949+
{GlueChain.getValue(1), A, B, C, GlueChain.getValue(2)},
7950+
Flags);
79487951
}
79497952

79507953
SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
@@ -8018,6 +8021,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
80188021
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
80198022
return FastLowered;
80208023

8024+
// The selection matcher assumes anything with a chain selecting to a
8025+
// mayRaiseFPException machine instruction. Since we're introducing a chain
8026+
// here, we need to explicitly report nofpexcept for the regular fdiv
8027+
// lowering.
8028+
SDNodeFlags Flags = Op->getFlags();
8029+
Flags.setNoFPExcept(true);
8030+
80218031
SDLoc SL(Op);
80228032
SDValue LHS = Op.getOperand(0);
80238033
SDValue RHS = Op.getOperand(1);
@@ -8027,15 +8037,15 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
80278037
SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
80288038

80298039
SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
8030-
RHS, RHS, LHS);
8040+
{RHS, RHS, LHS}, Flags);
80318041
SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
8032-
LHS, RHS, LHS);
8042+
{LHS, RHS, LHS}, Flags);
80338043

80348044
// Denominator is scaled to not be denormal, so using rcp is ok.
80358045
SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32,
8036-
DenominatorScaled);
8046+
DenominatorScaled, Flags);
80378047
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
8038-
DenominatorScaled);
8048+
DenominatorScaled, Flags);
80398049

80408050
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
80418051
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
@@ -8045,6 +8055,10 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
80458055
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
80468056

80478057
if (!HasFP32Denormals) {
8058+
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
8059+
// lowering. The chain dependence is insufficient, and we need glue. We do
8060+
// not need the glue variants in a strictfp function.
8061+
80488062
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
80498063

80508064
SDNode *EnableDenorm;
@@ -8072,21 +8086,22 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
80728086
}
80738087

80748088
SDValue Fma0 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0,
8075-
ApproxRcp, One, NegDivScale0);
8089+
ApproxRcp, One, NegDivScale0, Flags);
80768090

80778091
SDValue Fma1 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp,
8078-
ApproxRcp, Fma0);
8092+
ApproxRcp, Fma0, Flags);
80798093

80808094
SDValue Mul = getFPBinOp(DAG, ISD::FMUL, SL, MVT::f32, NumeratorScaled,
8081-
Fma1, Fma1);
8095+
Fma1, Fma1, Flags);
80828096

80838097
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
8084-
NumeratorScaled, Mul);
8098+
NumeratorScaled, Mul, Flags);
80858099

8086-
SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
8100+
SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32,
8101+
Fma2, Fma1, Mul, Fma2, Flags);
80878102

80888103
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
8089-
NumeratorScaled, Fma3);
8104+
NumeratorScaled, Fma3, Flags);
80908105

80918106
if (!HasFP32Denormals) {
80928107
SDNode *DisableDenorm;
@@ -8113,9 +8128,9 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
81138128

81148129
SDValue Scale = NumeratorScaled.getValue(1);
81158130
SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32,
8116-
Fma4, Fma1, Fma3, Scale);
8131+
{Fma4, Fma1, Fma3, Scale}, Flags);
81178132

8118-
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);
8133+
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS, Flags);
81198134
}
81208135

81218136
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {

0 commit comments

Comments
 (0)