-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[InstCombine] Optimise x / sqrt(y / z) with fast-math pattern. #76737
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Zain Jaffal (zjaffal) ChangesReplace the pattern with Full diff: https://github.com/llvm/llvm-project/pull/76737.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index f0ea3d9fcad5df..172ce18d003aa8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1701,6 +1701,31 @@ static Instruction *foldFDivPowDivisor(BinaryOperator &I,
return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
}
+/// Convert div to mul if we have an sqrt divisor iff sqrt's operand is a fdiv
+/// instruction.
+static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ // X / sqrt(Y / Z) --> X * sqrt(Z / Y)
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ auto *II = dyn_cast<IntrinsicInst>(Op1);
+ if (!II || II->getIntrinsicID() != Intrinsic::sqrt || !II->hasOneUse() ||
+ !I.hasAllowReassoc() || !I.hasAllowReciprocal())
+ return nullptr;
+
+ Value *Y, *Z;
+ auto *DivOp = dyn_cast<Instruction>(II->getOperand(0));
+ if (!DivOp || !DivOp->hasOneUse() || !DivOp->hasAllowReassoc() ||
+ !I.hasAllowReciprocal())
+ return nullptr;
+ if (match(DivOp, m_FDiv(m_Value(Y), m_Value(Z)))) {
+ Value *SwapDiv = Builder.CreateFDivFMF(Z, Y, DivOp);
+ Value *NewSqrt = Builder.CreateIntrinsic(II->getIntrinsicID(),
+ II->getType(), {SwapDiv}, II);
+ return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I);
+ }
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
Module *M = I.getModule();
@@ -1808,6 +1833,9 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
if (Instruction *Mul = foldFDivPowDivisor(I, Builder))
return Mul;
+ if (Instruction *Mul = foldFDivSqrtDivisor(I, Builder))
+ return Mul;
+
// pow(X, Y) / X --> pow(X, Y-1)
if (I.hasAllowReassoc() &&
match(Op0, m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Specific(Op1),
diff --git a/llvm/test/Transforms/InstCombine/fdiv-sqrt.ll b/llvm/test/Transforms/InstCombine/fdiv-sqrt.ll
new file mode 100644
index 00000000000000..3f41b0f24ae040
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fdiv-sqrt.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_div_fast(double %x, double %y, double %z) {
+; CHECK-LABEL: @sqrt_div_fast(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast double [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP0]])
+; CHECK-NEXT: [[DIV1:%.*]] = fmul fast double [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: ret double [[DIV1]]
+;
+entry:
+ %div = fdiv fast double %y, %z
+ %sqrt = call fast double @llvm.sqrt.f64(double %div)
+ %div1 = fdiv fast double %x, %sqrt
+ ret double %div1
+}
+
+define double @sqrt_div(double %x, double %y, double %z) {
+; CHECK-LABEL: @sqrt_div(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[DIV]])
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv double [[X:%.*]], [[SQRT]]
+; CHECK-NEXT: ret double [[DIV1]]
+;
+entry:
+ %div = fdiv double %y, %z
+ %sqrt = call double @llvm.sqrt.f64(double %div)
+ %div1 = fdiv double %x, %sqrt
+ ret double %div1
+}
+
+define double @sqrt_div_reassoc_arcp(double %x, double %y, double %z) {
+; CHECK-LABEL: @sqrt_div_reassoc_arcp(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc arcp double [[Z:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = call reassoc arcp double @llvm.sqrt.f64(double [[TMP0]])
+; CHECK-NEXT: [[DIV1:%.*]] = fmul reassoc arcp double [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: ret double [[DIV1]]
+;
+entry:
+ %div = fdiv reassoc arcp double %y, %z
+ %sqrt = call reassoc arcp double @llvm.sqrt.f64(double %div)
+ %div1 = fdiv reassoc arcp double %x, %sqrt
+ ret double %div1
+}
+
+declare void @use(double)
+define double @sqrt_div_fast_multiple_uses_1(double %x, double %y, double %z) {
+; CHECK-LABEL: @sqrt_div_fast_multiple_uses_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = fdiv fast double [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: call void @use(double [[DIV]])
+; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[DIV]])
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv fast double [[X:%.*]], [[SQRT]]
+; CHECK-NEXT: ret double [[DIV1]]
+;
+entry:
+ %div = fdiv fast double %y, %z
+ call void @use(double %div)
+ %sqrt = call fast double @llvm.sqrt.f64(double %div)
+ %div1 = fdiv fast double %x, %sqrt
+ ret double %div1
+}
+
+define double @sqrt_div_fast_multiple_uses_2(double %x, double %y, double %z) {
+; CHECK-LABEL: @sqrt_div_fast_multiple_uses_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = fdiv fast double [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[DIV]])
+; CHECK-NEXT: call void @use(double [[SQRT]])
+; CHECK-NEXT: [[DIV1:%.*]] = fdiv fast double [[X:%.*]], [[SQRT]]
+; CHECK-NEXT: ret double [[DIV1]]
+;
+entry:
+ %div = fdiv fast double %y, %z
+ %sqrt = call fast double @llvm.sqrt.f64(double %div)
+ call void @use(double %sqrt)
+ %div1 = fdiv fast double %x, %sqrt
+ ret double %div1
+}
+
|
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||
auto *II = dyn_cast<IntrinsicInst>(Op1); | ||
if (!II || II->getIntrinsicID() != Intrinsic::sqrt || !II->hasOneUse() || | ||
!I.hasAllowReassoc() || !I.hasAllowReciprocal()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need reassoc
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
my assumption was following the implementation of foldFDivPowDivisor
or at least the div inside the sqrt should have that flag enabled?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using x.recip()
as shorthand for (1.0 / x)
, arcp
allows us to convert between a / b
and a * b.recip()
freely, and arguably (a / b).recip()
to (b / a)
(at the very least, gcc will do this with just -freciprocal-math
).
We can convert x / sqrt(y / z)
to x * sqrt(y / z).recip()
with just arcp
, and we can also convert x * sqrt((y / z).recip())
to x * sqrt(z / y)
with just arcp
as well, but the question is which flags are necessary to convert sqrt(a).recip()
to sqrt(a.recip())
. I think it stretches arcp
too far to allow this kind of permutation of recip
. reassoc
isn't entirely the right flag either; what we probably want is a more generic "allow algebraic identity" flag, but we're already using reassoc
for that purpose elsewhere, so we might as well use it here as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
okay so from my understanding then the pattern we are looking for is the following
%div = fdiv arcp double %y, %z
%sqrt = call reassoc double @llvm.sqrt.f64(double %div)
%div2 = fdiv arcp double %x, %sqrt
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not quite, you'd need the reassoc
on the first fdiv
as well, and at that point, it's probably best to just require that all the operations have reassoc
and arcp
.
// X / sqrt(Y / Z) --> X * sqrt(Z / Y) | ||
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||
auto *II = dyn_cast<IntrinsicInst>(Op1); | ||
if (!II || II->getIntrinsicID() != Intrinsic::sqrt || !II->hasOneUse() || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be cleaner to use PatternMatch. Something like
if (match(I, m_Sqrt(m_OneUse(m_FDiv(m_Value(Op0), m_Value(Op1))) {
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree but then we won't be able to check if sqrt
has one use or fdiv
has the necessary flags
%div = fdiv fast double %y, %z | ||
call void @use(double %div) | ||
%sqrt = call fast double @llvm.sqrt.f64(double %div) | ||
%div1 = fdiv fast double %x, %sqrt |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should use reduced set of flags, and have some cases where flags are missing from individual instructions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should use reduced set of flags
Is it satisfactory to assume only reassoc arcp
are enabled?
have some cases where flags are missing from individual instructions
Will do thanks for the suggestion
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I think so
ping |
1 similar comment
ping |
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt -S -passes=instcombine < %s | FileCheck %s | ||
|
||
declare double @llvm.sqrt.f64(double) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pre-submit baseline tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In a separate PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, or directly push
✅ With the latest revision this PR passed the C/C++ code formatter. |
@arsenm thanks for your help i will push the test commits directly to main and then rebase this branch and merge |
Replace the pattern with x * sqrt(z/y)
Co-authored-by: Zain Jaffal <[email protected]>
9171f25
to
ab33697
Compare
ab33697
to
fca21cf
Compare
@arsenm i forced push the branch after cherry picking the tests to main now every check passes |
#76737)" This reverts commit bb5c389. That commit caused failed asserts like this: $ cat repro.c float a, b; double sqrt(); void c() { b = a / sqrt(a); } $ clang -target x86_64-linux-gnu -c -O2 -ffast-math repro.c clang: ../lib/IR/Instruction.cpp:522: bool llvm::Instruction::hasAllowReassoc() const: Assertion `isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"' failed.
Replace the pattern with
x * sqrt(z/y)