llvm
diff --git a/‎llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Lines changed: 143 additions & 0 deletions b/‎llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Lines changed: 143 additions & 0 deletions
@@ -626,6 +626,100 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
   return nullptr;
 }
 
+static bool isFSqrtDivToFMulLegal(Instruction *X,
+                                  const SmallVectorImpl<Instruction *> &R1,
+                                  const SmallVectorImpl<Instruction *> &R2) {
+  BasicBlock *BBx = X->getParent();
+  BasicBlock *BBr1 = R1[0]->getParent();
+  BasicBlock *BBr2 = R2[0]->getParent();
+  // Check the constaints on instruction X.
+  auto XConstraintsSatisfied = [X]() {
+    // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed
+    // by recip fp as it is strictly meant to transform ops of type a/b to
+    // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag
+    // has been used(rather abused)in the past for algebraic rewrites.
+    return X->hasAllowReassoc();
+  };
+  if (!XConstraintsSatisfied())
+    return false;
+
+  // Check the constraints on instructions in R1.
+  auto R1ConstraintsSatisfied = [BBr1](Instruction *I) {
+    // When you have multiple instructions residing in R1 and R2 respectively,
+    // it's difficult to generate combinations of (R1,R2) and then check if we
+    // have the required pattern. So, for now, just be conservative.
+    if (I->getParent() != BBr1)
+      return false;
+    if (!I->hasNUsesOrMore(1))
+      return false;
+    // The optimization tries to convert
+    // R1 = div * div    where, div = 1/sqrt(a)
+    // to
+    // R1 = 1/a
+    // Now, this simplication does not work because sqrt(a)=NaN when a<0
+    if (!I->hasNoNaNs())
+      return false;
+    // sqrt(-0.0) = -0.0, and doing this simplication would change the sign of
+    // the result.
+    return I->hasNoSignedZeros() && I->hasAllowReassoc();
+  };
+  if (!all_of(R1, R1ConstraintsSatisfied))
+    return false;
+
+  // Check the constraints on instructions in R2.
+  auto R2ConstraintsSatisfied = [BBr2](Instruction *I) {
+    // When you have multiple instructions residing in R1 and R2 respectively,
+    // it's difficult to generate combination of (R1,R2) and then check if we
+    // have the required pattern. So, for now, just be conservative.
+    if (I->getParent() != BBr2)
+      return false;
+    if (!I->hasNUsesOrMore(1))
+      return false;
+    // This simplication changes
+    // R2 = a/sqrt(a)
+    // to
+    // R2 = sqrt(a)
+    // Now, sqrt(-0.0) = -0.0 and doing this simplication would produce -0.0
+    // instead of NaN.
+    return I->hasNoSignedZeros() && I->hasAllowReassoc();
+  };
+  if (!all_of(R2, R2ConstraintsSatisfied))
+    return false;
+
+  // Check the constraints on X, R1 and R2 combined.
+  // fdiv instruction and one of the multiplications must reside in the same
+  // block. If not, the optimized code may execute more ops than before and
+  // this may hamper the performance.
+  return (BBx == BBr1 || BBx == BBr2);
+}
+
+static void getFSqrtDivOptPattern(Instruction *Div,
+                                  SmallVectorImpl<Instruction *> &R1,
+                                  SmallVectorImpl<Instruction *> &R2) {
+  Value *A;
+  if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A)))) ||
+      match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A))))) {
+    for (User *U : Div->users()) {
+      Instruction *I = dyn_cast<Instruction>(U);
+      if (!(I && I->getOpcode() == Instruction::FMul))
+        continue;
+
+      if (match(I, m_FMul(m_Specific(Div), m_Specific(Div)))) {
+        R1.push_back(I);
+        continue;
+      }
+    }
+    CallInst *CI = cast<CallInst>(Div->getOperand(1));
+    for (User *U : CI->users()) {
+      Instruction *I = dyn_cast<Instruction>(U);
+      if (match(I, m_FDiv(m_Specific(A), m_Sqrt(m_Specific(A))))) {
+        R2.push_back(I);
+        continue;
+      }
+    }
+  }
+}
+
 Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0);
   Value *Op1 = I.getOperand(1);
@@ -1796,6 +1890,35 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
   return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I);
 }
 
+static Value *convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X,
+                                      const SmallVectorImpl<Instruction *> &R1,
+                                      const SmallVectorImpl<Instruction *> &R2,
+                                      Value *SqrtOp,
+                                      InstCombiner::BuilderTy &B) {
+  // 1. synthesize tmp1 = 1/a and replace uses of r1
+  B.SetInsertPoint(X);
+  Value *Tmp1 =
+      B.CreateFDivFMF(ConstantFP::get(R1[0]->getType(), 1.0), SqrtOp, R1[0]);
+  for (auto *I : R1)
+    I->replaceAllUsesWith(Tmp1);
+
+  // 2. No need of synthesizing Tmp2 again. In this scenario, tmp2 = CI. Replace
+  // uses of r2 with tmp2
+  for (auto *I : R2)
+    I->replaceAllUsesWith(CI);
+
+  // 3. synthesize tmp3  = tmp1 * tmp2 . Replace uses of 'x' with tmp3
+  Value *Tmp3;
+  // If x = -1/sqrt(a) initially,then Tmp3 = -(Tmp1*tmp2)
+  if (match(X, m_FDiv(m_SpecificFP(-1.0), m_Specific(CI)))) {
+    Value *Mul = B.CreateFMul(Tmp1, CI);
+    Tmp3 = B.CreateFNegFMF(Mul, X);
+  } else
+    Tmp3 = B.CreateFMulFMF(Tmp1, CI, X);
+
+  return Tmp3;
+}
+
 Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
   Module *M = I.getModule();
 
@@ -1820,6 +1943,26 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
     return R;
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Convert
+  // x = 1.0/sqrt(a)
+  // r1 = x * x;
+  // r2 = a/sqrt(a);
+  //
+  // TO
+  //
+  // r1 = 1/a
+  // r2 = sqrt(a)
+  // x = r1 * r2
+  SmallVector<Instruction *, 2> R1, R2;
+  getFSqrtDivOptPattern(&I, R1, R2);
+  if (!R1.empty() && !R2.empty() && isFSqrtDivToFMulLegal(&I, R1, R2)) {
+    CallInst *CI = cast<CallInst>(I.getOperand(1));
+    Value *SqrtOp = CI->getArgOperand(0);
+    if (Value *D = convertFSqrtDivIntoFMul(CI, &I, R1, R2, SqrtOp, Builder))
+      return replaceInstUsesWith(I, D);
+  }
+
   if (isa<Constant>(Op0))
     if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
       if (Instruction *R = FoldOpIntoSelect(I, SI))