Skip to content

Commit 6012fed

Browse files
committed
AMDGPU: Fix sqrt fast math flags spreading to fdiv fast math flags
This was working around the lack of operator| on FastMathFlags. We have that now which revealed the bug.
1 parent 2263dfe commit 6012fed

File tree

2 files changed

+319
-302
lines changed

2 files changed

+319
-302
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -891,8 +891,8 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
891891
}
892892

893893
Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
894-
IRBuilder<> &Builder, Value *Num, Value *Den, FastMathFlags DivFMF,
895-
FastMathFlags SqrtFMF, const Instruction *CtxI) const {
894+
IRBuilder<> &Builder, Value *Num, Value *Den, const FastMathFlags DivFMF,
895+
const FastMathFlags SqrtFMF, const Instruction *CtxI) const {
896896
// The rsqrt contraction increases accuracy from ~2ulp to ~1ulp.
897897
assert(DivFMF.allowContract() && SqrtFMF.allowContract());
898898

@@ -911,8 +911,7 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
911911
if (CLHS->isExactlyValue(1.0) || (IsNegative = CLHS->isExactlyValue(-1.0))) {
912912
// Add in the sqrt flags.
913913
IRBuilder<>::FastMathFlagGuard Guard(Builder);
914-
DivFMF |= SqrtFMF;
915-
Builder.setFastMathFlags(DivFMF);
914+
Builder.setFastMathFlags(DivFMF | SqrtFMF);
916915

917916
if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) || HasUnsafeFPMath ||
918917
canIgnoreDenormalInput(Den, CtxI)) {

0 commit comments

Comments
 (0)