@@ -7907,9 +7907,10 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
7907
7907
}
7908
7908
7909
7909
static SDValue getFPBinOp (SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
7910
- EVT VT, SDValue A, SDValue B, SDValue GlueChain) {
7910
+ EVT VT, SDValue A, SDValue B, SDValue GlueChain,
7911
+ SDNodeFlags Flags) {
7911
7912
if (GlueChain->getNumValues () <= 1 ) {
7912
- return DAG.getNode (Opcode, SL, VT, A, B);
7913
+ return DAG.getNode (Opcode, SL, VT, A, B, Flags );
7913
7914
}
7914
7915
7915
7916
assert (GlueChain->getNumValues () == 3 );
@@ -7922,15 +7923,16 @@ static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
7922
7923
break ;
7923
7924
}
7924
7925
7925
- return DAG.getNode (Opcode, SL, VTList, GlueChain.getValue (1 ), A, B,
7926
- GlueChain.getValue (2 ));
7926
+ return DAG.getNode (Opcode, SL, VTList,
7927
+ {GlueChain.getValue (1 ), A, B, GlueChain.getValue (2 )},
7928
+ Flags);
7927
7929
}
7928
7930
7929
7931
static SDValue getFPTernOp (SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
7930
7932
EVT VT, SDValue A, SDValue B, SDValue C,
7931
- SDValue GlueChain) {
7933
+ SDValue GlueChain, SDNodeFlags Flags ) {
7932
7934
if (GlueChain->getNumValues () <= 1 ) {
7933
- return DAG.getNode (Opcode, SL, VT, A, B, C);
7935
+ return DAG.getNode (Opcode, SL, VT, { A, B, C}, Flags );
7934
7936
}
7935
7937
7936
7938
assert (GlueChain->getNumValues () == 3 );
@@ -7943,8 +7945,9 @@ static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
7943
7945
break ;
7944
7946
}
7945
7947
7946
- return DAG.getNode (Opcode, SL, VTList, GlueChain.getValue (1 ), A, B, C,
7947
- GlueChain.getValue (2 ));
7948
+ return DAG.getNode (Opcode, SL, VTList,
7949
+ {GlueChain.getValue (1 ), A, B, C, GlueChain.getValue (2 )},
7950
+ Flags);
7948
7951
}
7949
7952
7950
7953
SDValue SITargetLowering::LowerFDIV16 (SDValue Op, SelectionDAG &DAG) const {
@@ -8018,6 +8021,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
8018
8021
if (SDValue FastLowered = lowerFastUnsafeFDIV (Op, DAG))
8019
8022
return FastLowered;
8020
8023
8024
+ // The selection matcher assumes anything with a chain selecting to a
8025
+ // mayRaiseFPException machine instruction. Since we're introducing a chain
8026
+ // here, we need to explicitly report nofpexcept for the regular fdiv
8027
+ // lowering.
8028
+ SDNodeFlags Flags = Op->getFlags ();
8029
+ Flags.setNoFPExcept (true );
8030
+
8021
8031
SDLoc SL (Op);
8022
8032
SDValue LHS = Op.getOperand (0 );
8023
8033
SDValue RHS = Op.getOperand (1 );
@@ -8027,15 +8037,15 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
8027
8037
SDVTList ScaleVT = DAG.getVTList (MVT::f32 , MVT::i1);
8028
8038
8029
8039
SDValue DenominatorScaled = DAG.getNode (AMDGPUISD::DIV_SCALE, SL, ScaleVT,
8030
- RHS, RHS, LHS);
8040
+ { RHS, RHS, LHS}, Flags );
8031
8041
SDValue NumeratorScaled = DAG.getNode (AMDGPUISD::DIV_SCALE, SL, ScaleVT,
8032
- LHS, RHS, LHS);
8042
+ { LHS, RHS, LHS}, Flags );
8033
8043
8034
8044
// Denominator is scaled to not be denormal, so using rcp is ok.
8035
8045
SDValue ApproxRcp = DAG.getNode (AMDGPUISD::RCP, SL, MVT::f32 ,
8036
- DenominatorScaled);
8046
+ DenominatorScaled, Flags );
8037
8047
SDValue NegDivScale0 = DAG.getNode (ISD::FNEG, SL, MVT::f32 ,
8038
- DenominatorScaled);
8048
+ DenominatorScaled, Flags );
8039
8049
8040
8050
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
8041
8051
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
@@ -8045,6 +8055,10 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
8045
8055
const bool HasFP32Denormals = hasFP32Denormals (DAG.getMachineFunction ());
8046
8056
8047
8057
if (!HasFP32Denormals) {
8058
+ // Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
8059
+ // lowering. The chain dependence is insufficient, and we need glue. We do
8060
+ // not need the glue variants in a strictfp function.
8061
+
8048
8062
SDVTList BindParamVTs = DAG.getVTList (MVT::Other, MVT::Glue);
8049
8063
8050
8064
SDNode *EnableDenorm;
@@ -8072,21 +8086,22 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
8072
8086
}
8073
8087
8074
8088
SDValue Fma0 = getFPTernOp (DAG, ISD::FMA, SL, MVT::f32 , NegDivScale0,
8075
- ApproxRcp, One, NegDivScale0);
8089
+ ApproxRcp, One, NegDivScale0, Flags );
8076
8090
8077
8091
SDValue Fma1 = getFPTernOp (DAG, ISD::FMA, SL, MVT::f32 , Fma0, ApproxRcp,
8078
- ApproxRcp, Fma0);
8092
+ ApproxRcp, Fma0, Flags );
8079
8093
8080
8094
SDValue Mul = getFPBinOp (DAG, ISD::FMUL, SL, MVT::f32 , NumeratorScaled,
8081
- Fma1, Fma1);
8095
+ Fma1, Fma1, Flags );
8082
8096
8083
8097
SDValue Fma2 = getFPTernOp (DAG, ISD::FMA, SL, MVT::f32 , NegDivScale0, Mul,
8084
- NumeratorScaled, Mul);
8098
+ NumeratorScaled, Mul, Flags );
8085
8099
8086
- SDValue Fma3 = getFPTernOp (DAG, ISD::FMA, SL, MVT::f32 , Fma2, Fma1, Mul, Fma2);
8100
+ SDValue Fma3 = getFPTernOp (DAG, ISD::FMA, SL, MVT::f32 ,
8101
+ Fma2, Fma1, Mul, Fma2, Flags);
8087
8102
8088
8103
SDValue Fma4 = getFPTernOp (DAG, ISD::FMA, SL, MVT::f32 , NegDivScale0, Fma3,
8089
- NumeratorScaled, Fma3);
8104
+ NumeratorScaled, Fma3, Flags );
8090
8105
8091
8106
if (!HasFP32Denormals) {
8092
8107
SDNode *DisableDenorm;
@@ -8113,9 +8128,9 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
8113
8128
8114
8129
SDValue Scale = NumeratorScaled.getValue (1 );
8115
8130
SDValue Fmas = DAG.getNode (AMDGPUISD::DIV_FMAS, SL, MVT::f32 ,
8116
- Fma4, Fma1, Fma3, Scale);
8131
+ { Fma4, Fma1, Fma3, Scale}, Flags );
8117
8132
8118
- return DAG.getNode (AMDGPUISD::DIV_FIXUP, SL, MVT::f32 , Fmas, RHS, LHS);
8133
+ return DAG.getNode (AMDGPUISD::DIV_FIXUP, SL, MVT::f32 , Fmas, RHS, LHS, Flags );
8119
8134
}
8120
8135
8121
8136
SDValue SITargetLowering::LowerFDIV64 (SDValue Op, SelectionDAG &DAG) const {
0 commit comments