Skip to content

Commit 613222e

Browse files
authored
[DAGCombiner] Remove UnsafeFPMath usage in visitFSUBForFMACombine etc. (#145637)
Remove `UnsafeFPMath` in `visitFMULForFMADistributiveCombine`, `visitFSUBForFMACombine` and `visitFDIV`. All affected tests are fixed by add fast math flags manually. Propagate fast math flags when lowering fdiv in NVPTX backend, so it can produce optimized dag when `unsafe-fp-math` is absent.
1 parent 2de5134 commit 613222e

File tree

12 files changed

+2764
-2104
lines changed

12 files changed

+2764
-2104
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16820,7 +16820,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
1682016820
static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
1682116821
assert(N.getOpcode() == ISD::FMUL);
1682216822

16823-
return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
16823+
return Options.AllowFPOpFusion == FPOpFusion::Fast ||
1682416824
N->getFlags().hasAllowContract();
1682516825
}
1682616826

@@ -17093,8 +17093,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
1709317093
return SDValue();
1709417094

1709517095
const SDNodeFlags Flags = N->getFlags();
17096-
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
17097-
Options.UnsafeFPMath || HasFMAD);
17096+
bool AllowFusionGlobally =
17097+
(Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD);
1709817098

1709917099
// If the subtraction is not contractable, do not combine.
1710017100
if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
@@ -17249,22 +17249,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
1724917249
}
1725017250
}
1725117251

17252-
auto isReassociable = [&Options](SDNode *N) {
17253-
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17254-
};
17255-
17256-
auto isContractableAndReassociableFMUL = [&isContractableFMUL,
17257-
&isReassociable](SDValue N) {
17258-
return isContractableFMUL(N) && isReassociable(N.getNode());
17252+
auto isContractableAndReassociableFMUL = [&isContractableFMUL](SDValue N) {
17253+
return isContractableFMUL(N) && N->getFlags().hasAllowReassociation();
1725917254
};
1726017255

1726117256
auto isFusedOp = [&](SDValue N) {
1726217257
return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
1726317258
};
1726417259

1726517260
// More folding opportunities when target permits.
17266-
if (Aggressive && isReassociable(N)) {
17267-
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
17261+
if (Aggressive && N->getFlags().hasAllowReassociation()) {
17262+
bool CanFuse = N->getFlags().hasAllowContract();
1726817263
// fold (fsub (fma x, y, (fmul u, v)), z)
1726917264
// -> (fma x, y (fma u, v, (fneg z)))
1727017265
if (CanFuse && isFusedOp(N0) &&
@@ -17421,8 +17416,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
1742117416

1742217417
// Floating-point multiply-add with intermediate rounding. This can result
1742317418
// in a less precise result due to the changed rounding order.
17424-
bool HasFMAD = Options.UnsafeFPMath &&
17425-
(LegalOperations && TLI.isFMADLegal(DAG, N));
17419+
bool HasFMAD = LegalOperations && TLI.isFMADLegal(DAG, N);
1742617420

1742717421
// No valid opcode, do not combine.
1742817422
if (!HasFMAD && !HasFMA)
@@ -18321,8 +18315,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
1832118315
// Only do the transform if the reciprocal is a legal fp immediate that
1832218316
// isn't too nasty (eg NaN, denormal, ...).
1832318317
if (((st == APFloat::opOK && !Recip.isDenormal()) ||
18324-
(st == APFloat::opInexact &&
18325-
(Options.UnsafeFPMath || Flags.hasAllowReciprocal()))) &&
18318+
(st == APFloat::opInexact && Flags.hasAllowReciprocal())) &&
1832618319
(!LegalOperations ||
1832718320
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
1832818321
// backend)... we should handle this gracefully after Legalize.
@@ -18333,7 +18326,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
1833318326
DAG.getConstantFP(Recip, DL, VT));
1833418327
}
1833518328

18336-
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
18329+
if (Flags.hasAllowReciprocal()) {
1833718330
// If this FDIV is part of a reciprocal square root, it may be folded
1833818331
// into a target-specific square root estimate instruction.
1833918332
if (N1.getOpcode() == ISD::FSQRT) {
@@ -18408,7 +18401,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
1840818401

1840918402
// Fold X/Sqrt(X) -> Sqrt(X)
1841018403
if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
18411-
(Options.UnsafeFPMath || Flags.hasAllowReassociation()))
18404+
Flags.hasAllowReassociation())
1841218405
if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
1841318406
return N1;
1841418407

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2786,15 +2786,16 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
27862786
SDValue X = Op->getOperand(0);
27872787
SDValue Y = Op->getOperand(1);
27882788
EVT Ty = Op.getValueType();
2789+
SDNodeFlags Flags = Op->getFlags();
27892790

2790-
SDValue Div = DAG.getNode(ISD::FDIV, DL, Ty, X, Y);
2791-
SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, Ty, Div);
2792-
SDValue Mul =
2793-
DAG.getNode(ISD::FMUL, DL, Ty, Trunc, Y, SDNodeFlags::AllowContract);
2794-
SDValue Sub =
2795-
DAG.getNode(ISD::FSUB, DL, Ty, X, Mul, SDNodeFlags::AllowContract);
2791+
SDValue Div = DAG.getNode(ISD::FDIV, DL, Ty, X, Y, Flags);
2792+
SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, Ty, Div, Flags);
2793+
SDValue Mul = DAG.getNode(ISD::FMUL, DL, Ty, Trunc, Y,
2794+
Flags | SDNodeFlags::AllowContract);
2795+
SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
2796+
Flags | SDNodeFlags::AllowContract);
27962797

2797-
if (AllowUnsafeFPMath || Op->getFlags().hasNoInfs())
2798+
if (AllowUnsafeFPMath || Flags.hasNoInfs())
27982799
return Sub;
27992800

28002801
// If Y is infinite, return X

0 commit comments

Comments
 (0)