-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[SelectionDAG] Remove UnsafeFPMath check in visitFADDForFMACombine
#127770
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16619,8 +16619,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { | |
if (!HasFMAD && !HasFMA) | ||
return SDValue(); | ||
|
||
bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || | ||
Options.UnsafeFPMath || HasFMAD); | ||
bool AllowFusionGlobally = | ||
Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD; | ||
// If the addition is not contractable, do not combine. | ||
if (!AllowFusionGlobally && !N->getFlags().hasAllowContract()) | ||
return SDValue(); | ||
|
@@ -17826,6 +17826,7 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { | |
SDValue N2 = N->getOperand(2); | ||
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); | ||
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); | ||
ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); | ||
EVT VT = N->getValueType(0); | ||
SDLoc DL(N); | ||
const TargetOptions &Options = DAG.getTarget().Options; | ||
|
@@ -17855,11 +17856,17 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { | |
} | ||
|
||
// FIXME: use fast math flags instead of Options.UnsafeFPMath | ||
if (Options.UnsafeFPMath) { | ||
if (N0CFP && N0CFP->isZero()) | ||
return N2; | ||
if (N1CFP && N1CFP->isZero()) | ||
return N2; | ||
// TODO: Finally migrate away from global TargetOptions. | ||
if (Options.AllowFPOpFusion == FPOpFusion::Fast || | ||
(Options.NoNaNsFPMath && Options.NoInfsFPMath) || | ||
(N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) { | ||
if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros() || | ||
(N2CFP && !N2CFP->isExactlyValue(-0.0))) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have tests for this negative zero constant case? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a simple test for X86 |
||
if (N0CFP && N0CFP->isZero()) | ||
return N2; | ||
if (N1CFP && N1CFP->isZero()) | ||
return N2; | ||
} | ||
} | ||
|
||
// FIXME: Support splat of constant. | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,18 +1,16 @@ | ||||||||||||||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||||||||||||||||||
; RUN: llc -mtriple=arm64 -fp-contract=fast -o - %s | FileCheck %s | ||||||||||||||||||
; RUN: llc -mtriple=arm64 -o - %s | FileCheck %s | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
; Make sure we don't try to fold an fneg into +0.0, creating an illegal constant | ||||||||||||||||||
; -0.0. It's also good, though not essential, that we don't resort to a litpool. | ||||||||||||||||||
define double @test_fms_fold(double %a, double %b) { | ||||||||||||||||||
; CHECK-LABEL: test_fms_fold: | ||||||||||||||||||
; CHECK: // %bb.0: | ||||||||||||||||||
; CHECK-NEXT: movi d2, #0000000000000000 | ||||||||||||||||||
; CHECK-NEXT: fmul d1, d1, d2 | ||||||||||||||||||
; CHECK-NEXT: fnmsub d0, d0, d2, d1 | ||||||||||||||||||
; CHECK-NEXT: movi {{d[0-9]+}}, #0000000000000000 | ||||||||||||||||||
; CHECK-NEXT: ret | ||||||||||||||||||
%mul = fmul double %a, 0.000000e+00 | ||||||||||||||||||
%mul1 = fmul double %b, 0.000000e+00 | ||||||||||||||||||
%mul = fmul fast double %a, 0.000000e+00 | ||||||||||||||||||
%mul1 = fmul fast double %b, 0.000000e+00 | ||||||||||||||||||
Comment on lines
+12
to
+13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this test case should keep fast here, because the initial version of this test is
which ensures constant folding do not generate -0.0 on arm. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2 tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test was introduced in 820e041 and was regenerated in d5f1131, which seems like another regression. See https://reviews.llvm.org/D99586 |
||||||||||||||||||
%sub = fsub double %mul, %mul1 | ||||||||||||||||||
ret double %sub | ||||||||||||||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe the "Options.AllowFPOpFusion == FPOpFusion::Fast" part of the condition is incorrect: -fp-contract=fast means you can translate mul/add to fma globally. It does not mean you can ignore NaNs. Furthermore, no tests fail if I remove this part of the OR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Created PR #146592 to fix.