@@ -626,6 +626,100 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
626
626
return nullptr ;
627
627
}
628
628
629
+ static bool isFSqrtDivToFMulLegal (Instruction *X,
630
+ const SmallVectorImpl<Instruction *> &R1,
631
+ const SmallVectorImpl<Instruction *> &R2) {
632
+ BasicBlock *BBx = X->getParent ();
633
+ BasicBlock *BBr1 = R1[0 ]->getParent ();
634
+ BasicBlock *BBr2 = R2[0 ]->getParent ();
635
+ // Check the constaints on instruction X.
636
+ auto XConstraintsSatisfied = [X]() {
637
+ // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed
638
+ // by recip fp as it is strictly meant to transform ops of type a/b to
639
+ // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag
640
+ // has been used(rather abused)in the past for algebraic rewrites.
641
+ return X->hasAllowReassoc ();
642
+ };
643
+ if (!XConstraintsSatisfied ())
644
+ return false ;
645
+
646
+ // Check the constraints on instructions in R1.
647
+ auto R1ConstraintsSatisfied = [BBr1](Instruction *I) {
648
+ // When you have multiple instructions residing in R1 and R2 respectively,
649
+ // it's difficult to generate combinations of (R1,R2) and then check if we
650
+ // have the required pattern. So, for now, just be conservative.
651
+ if (I->getParent () != BBr1)
652
+ return false ;
653
+ if (!I->hasNUsesOrMore (1 ))
654
+ return false ;
655
+ // The optimization tries to convert
656
+ // R1 = div * div where, div = 1/sqrt(a)
657
+ // to
658
+ // R1 = 1/a
659
+ // Now, this simplication does not work because sqrt(a)=NaN when a<0
660
+ if (!I->hasNoNaNs ())
661
+ return false ;
662
+ // sqrt(-0.0) = -0.0, and doing this simplication would change the sign of
663
+ // the result.
664
+ return I->hasNoSignedZeros () && I->hasAllowReassoc ();
665
+ };
666
+ if (!all_of (R1, R1ConstraintsSatisfied))
667
+ return false ;
668
+
669
+ // Check the constraints on instructions in R2.
670
+ auto R2ConstraintsSatisfied = [BBr2](Instruction *I) {
671
+ // When you have multiple instructions residing in R1 and R2 respectively,
672
+ // it's difficult to generate combination of (R1,R2) and then check if we
673
+ // have the required pattern. So, for now, just be conservative.
674
+ if (I->getParent () != BBr2)
675
+ return false ;
676
+ if (!I->hasNUsesOrMore (1 ))
677
+ return false ;
678
+ // This simplication changes
679
+ // R2 = a/sqrt(a)
680
+ // to
681
+ // R2 = sqrt(a)
682
+ // Now, sqrt(-0.0) = -0.0 and doing this simplication would produce -0.0
683
+ // instead of NaN.
684
+ return I->hasNoSignedZeros () && I->hasAllowReassoc ();
685
+ };
686
+ if (!all_of (R2, R2ConstraintsSatisfied))
687
+ return false ;
688
+
689
+ // Check the constraints on X, R1 and R2 combined.
690
+ // fdiv instruction and one of the multiplications must reside in the same
691
+ // block. If not, the optimized code may execute more ops than before and
692
+ // this may hamper the performance.
693
+ return (BBx == BBr1 || BBx == BBr2);
694
+ }
695
+
696
+ static void getFSqrtDivOptPattern (Instruction *Div,
697
+ SmallVectorImpl<Instruction *> &R1,
698
+ SmallVectorImpl<Instruction *> &R2) {
699
+ Value *A;
700
+ if (match (Div, m_FDiv (m_FPOne (), m_Sqrt (m_Value (A)))) ||
701
+ match (Div, m_FDiv (m_SpecificFP (-1.0 ), m_Sqrt (m_Value (A))))) {
702
+ for (User *U : Div->users ()) {
703
+ Instruction *I = dyn_cast<Instruction>(U);
704
+ if (!(I && I->getOpcode () == Instruction::FMul))
705
+ continue ;
706
+
707
+ if (match (I, m_FMul (m_Specific (Div), m_Specific (Div)))) {
708
+ R1.push_back (I);
709
+ continue ;
710
+ }
711
+ }
712
+ CallInst *CI = cast<CallInst>(Div->getOperand (1 ));
713
+ for (User *U : CI->users ()) {
714
+ Instruction *I = dyn_cast<Instruction>(U);
715
+ if (match (I, m_FDiv (m_Specific (A), m_Sqrt (m_Specific (A))))) {
716
+ R2.push_back (I);
717
+ continue ;
718
+ }
719
+ }
720
+ }
721
+ }
722
+
629
723
Instruction *InstCombinerImpl::foldFMulReassoc (BinaryOperator &I) {
630
724
Value *Op0 = I.getOperand (0 );
631
725
Value *Op1 = I.getOperand (1 );
@@ -1796,6 +1890,35 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
1796
1890
return BinaryOperator::CreateFMulFMF (Op0, NewSqrt, &I);
1797
1891
}
1798
1892
1893
+ static Value *convertFSqrtDivIntoFMul (CallInst *CI, Instruction *X,
1894
+ const SmallVectorImpl<Instruction *> &R1,
1895
+ const SmallVectorImpl<Instruction *> &R2,
1896
+ Value *SqrtOp,
1897
+ InstCombiner::BuilderTy &B) {
1898
+ // 1. synthesize tmp1 = 1/a and replace uses of r1
1899
+ B.SetInsertPoint (X);
1900
+ Value *Tmp1 =
1901
+ B.CreateFDivFMF (ConstantFP::get (R1[0 ]->getType (), 1.0 ), SqrtOp, R1[0 ]);
1902
+ for (auto *I : R1)
1903
+ I->replaceAllUsesWith (Tmp1);
1904
+
1905
+ // 2. No need of synthesizing Tmp2 again. In this scenario, tmp2 = CI. Replace
1906
+ // uses of r2 with tmp2
1907
+ for (auto *I : R2)
1908
+ I->replaceAllUsesWith (CI);
1909
+
1910
+ // 3. synthesize tmp3 = tmp1 * tmp2 . Replace uses of 'x' with tmp3
1911
+ Value *Tmp3;
1912
+ // If x = -1/sqrt(a) initially,then Tmp3 = -(Tmp1*tmp2)
1913
+ if (match (X, m_FDiv (m_SpecificFP (-1.0 ), m_Specific (CI)))) {
1914
+ Value *Mul = B.CreateFMul (Tmp1, CI);
1915
+ Tmp3 = B.CreateFNegFMF (Mul, X);
1916
+ } else
1917
+ Tmp3 = B.CreateFMulFMF (Tmp1, CI, X);
1918
+
1919
+ return Tmp3;
1920
+ }
1921
+
1799
1922
Instruction *InstCombinerImpl::visitFDiv (BinaryOperator &I) {
1800
1923
Module *M = I.getModule ();
1801
1924
@@ -1820,6 +1943,26 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
1820
1943
return R;
1821
1944
1822
1945
Value *Op0 = I.getOperand (0 ), *Op1 = I.getOperand (1 );
1946
+
1947
+ // Convert
1948
+ // x = 1.0/sqrt(a)
1949
+ // r1 = x * x;
1950
+ // r2 = a/sqrt(a);
1951
+ //
1952
+ // TO
1953
+ //
1954
+ // r1 = 1/a
1955
+ // r2 = sqrt(a)
1956
+ // x = r1 * r2
1957
+ SmallVector<Instruction *, 2 > R1, R2;
1958
+ getFSqrtDivOptPattern (&I, R1, R2);
1959
+ if (!R1.empty () && !R2.empty () && isFSqrtDivToFMulLegal (&I, R1, R2)) {
1960
+ CallInst *CI = cast<CallInst>(I.getOperand (1 ));
1961
+ Value *SqrtOp = CI->getArgOperand (0 );
1962
+ if (Value *D = convertFSqrtDivIntoFMul (CI, &I, R1, R2, SqrtOp, Builder))
1963
+ return replaceInstUsesWith (I, D);
1964
+ }
1965
+
1823
1966
if (isa<Constant>(Op0))
1824
1967
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
1825
1968
if (Instruction *R = FoldOpIntoSelect (I, SI))
0 commit comments