13
13
14
14
#include " InstCombineInternal.h"
15
15
#include " llvm/ADT/APInt.h"
16
+ #include " llvm/ADT/SmallPtrSet.h"
16
17
#include " llvm/ADT/SmallVector.h"
17
18
#include " llvm/Analysis/InstructionSimplify.h"
18
19
#include " llvm/Analysis/ValueTracking.h"
@@ -666,6 +667,90 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
666
667
return nullptr ;
667
668
}
668
669
670
+ // Check legality for transforming
671
+ // x = 1.0/sqrt(a)
672
+ // r1 = x * x;
673
+ // r2 = a/sqrt(a);
674
+ //
675
+ // TO
676
+ //
677
+ // r1 = 1/a
678
+ // r2 = sqrt(a)
679
+ // x = r1 * r2
680
+ // This transform works only when 'a' is known positive.
681
+ static bool isFSqrtDivToFMulLegal (Instruction *X,
682
+ SmallPtrSetImpl<Instruction *> &R1,
683
+ SmallPtrSetImpl<Instruction *> &R2) {
684
+ BasicBlock *BBx = X->getParent ();
685
+ BasicBlock *BBr1 = (*R1.begin ())->getParent ();
686
+ BasicBlock *BBr2 = (*R2.begin ())->getParent ();
687
+
688
+ CallInst *FSqrt = cast<CallInst>(X->getOperand (1 ));
689
+ if (!FSqrt->hasAllowReassoc () || !FSqrt->hasNoNaNs () ||
690
+ !FSqrt->hasNoSignedZeros () || !FSqrt->hasNoInfs ())
691
+ return false ;
692
+
693
+ // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed
694
+ // by recip fp as it is strictly meant to transform ops of type a/b to
695
+ // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag
696
+ // has been used(rather abused)in the past for algebraic rewrites.
697
+ if (!X->hasAllowReassoc () || !X->hasAllowReciprocal () || !X->hasNoInfs ())
698
+ return false ;
699
+
700
+ // Check the constraints on X, R1 and R2 combined.
701
+ // fdiv instruction and one of the multiplications must reside in the same
702
+ // block. If not, the optimized code may execute more ops than before and
703
+ // this may hamper the performance.
704
+ if (BBx != BBr1 && BBx != BBr2)
705
+ return false ;
706
+
707
+ // Check the constraints on instructions in R1.
708
+ if (any_of (R1, [BBr1](Instruction *I) {
709
+ // When you have multiple instructions residing in R1 and R2
710
+ // respectively, it's difficult to generate combinations of (R1,R2) and
711
+ // then check if we have the required pattern. So, for now, just be
712
+ // conservative.
713
+ return (I->getParent () != BBr1 || !I->hasAllowReassoc ());
714
+ }))
715
+ return false ;
716
+
717
+ // Check the constraints on instructions in R2.
718
+ return all_of (R2, [BBr2](Instruction *I) {
719
+ // When you have multiple instructions residing in R1 and R2
720
+ // respectively, it's difficult to generate combination of (R1,R2) and
721
+ // then check if we have the required pattern. So, for now, just be
722
+ // conservative.
723
+ return (I->getParent () == BBr2 && I->hasAllowReassoc ());
724
+ });
725
+ }
726
+
727
+ // If we have the following pattern,
728
+ // X = 1.0/sqrt(a)
729
+ // R1 = X * X
730
+ // R2 = a/sqrt(a)
731
+ // then this method collects all the instructions that match R1 and R2.
732
+ static bool getFSqrtDivOptPattern (Instruction *Div,
733
+ SmallPtrSetImpl<Instruction *> &R1,
734
+ SmallPtrSetImpl<Instruction *> &R2) {
735
+ Value *A;
736
+ if (match (Div, m_FDiv (m_FPOne (), m_Sqrt (m_Value (A)))) ||
737
+ match (Div, m_FDiv (m_SpecificFP (-1.0 ), m_Sqrt (m_Value (A))))) {
738
+ for (User *U : Div->users ()) {
739
+ Instruction *I = cast<Instruction>(U);
740
+ if (match (I, m_FMul (m_Specific (Div), m_Specific (Div))))
741
+ R1.insert (I);
742
+ }
743
+
744
+ CallInst *CI = cast<CallInst>(Div->getOperand (1 ));
745
+ for (User *U : CI->users ()) {
746
+ Instruction *I = cast<Instruction>(U);
747
+ if (match (I, m_FDiv (m_Specific (A), m_Sqrt (m_Specific (A)))))
748
+ R2.insert (I);
749
+ }
750
+ }
751
+ return !R1.empty () && !R2.empty ();
752
+ }
753
+
669
754
Instruction *InstCombinerImpl::foldFMulReassoc (BinaryOperator &I) {
670
755
Value *Op0 = I.getOperand (0 );
671
756
Value *Op1 = I.getOperand (1 );
@@ -1864,6 +1949,64 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
1864
1949
return BinaryOperator::CreateFMulFMF (Op0, NewSqrt, &I);
1865
1950
}
1866
1951
1952
+ // Change
1953
+ // X = 1/sqrt(a)
1954
+ // R1 = X * X
1955
+ // R2 = a * X
1956
+ //
1957
+ // TO
1958
+ //
1959
+ // FDiv = 1/a
1960
+ // FSqrt = sqrt(a)
1961
+ // FMul = FDiv * FSqrt
1962
+ // Replace Uses Of R1 With FDiv
1963
+ // Replace Uses Of R2 With FSqrt
1964
+ // Replace Uses Of X With FMul
1965
+ static Value *convertFSqrtDivIntoFMul (CallInst *CI, Instruction *X,
1966
+ SmallPtrSetImpl<Instruction *> &R1,
1967
+ SmallPtrSetImpl<Instruction *> &R2,
1968
+ InstCombiner::BuilderTy &B) {
1969
+
1970
+ B.SetInsertPoint (X);
1971
+
1972
+ // Every instance of R1 may have different fpmath metadata and fpmath flags.
1973
+ // We try to preserve them by having seperate fdiv instruction per R1
1974
+ // instance.
1975
+ Value *SqrtOp = CI->getArgOperand (0 );
1976
+ Instruction *FDiv;
1977
+
1978
+ for (Instruction *I : R1) {
1979
+ FDiv = cast<Instruction>(
1980
+ B.CreateFDiv (ConstantFP::get ((*R1.begin ())->getType (), 1.0 ), SqrtOp));
1981
+ FDiv->copyMetadata (*I);
1982
+ FDiv->copyFastMathFlags (I);
1983
+ I->replaceAllUsesWith (FDiv);
1984
+ }
1985
+
1986
+ // Although, by value, FSqrt = CI , every instance of R2 may have different
1987
+ // fpmath metadata and fpmath flags. We try to preserve them by cloning the
1988
+ // call instruction per R2 instance.
1989
+ CallInst *FSqrt;
1990
+ for (Instruction *I : R2) {
1991
+ FSqrt = cast<CallInst>(CI->clone ());
1992
+ FSqrt->insertBefore (CI);
1993
+ FSqrt->copyFastMathFlags (I);
1994
+ FSqrt->copyMetadata (*I);
1995
+ I->replaceAllUsesWith (FSqrt);
1996
+ }
1997
+
1998
+ Instruction *FMul;
1999
+ // If X = -1/sqrt(a) initially,then FMul = -(FDiv * FSqrt)
2000
+ if (match (X, m_FDiv (m_SpecificFP (-1.0 ), m_Specific (CI)))) {
2001
+ Value *Mul = B.CreateFMul (FDiv, FSqrt);
2002
+ FMul = cast<Instruction>(B.CreateFNegFMF (Mul, X));
2003
+ } else
2004
+ FMul = cast<Instruction>(B.CreateFMulFMF (FDiv, FSqrt, X));
2005
+ FMul->copyMetadata (*X);
2006
+
2007
+ return FMul;
2008
+ }
2009
+
1867
2010
Instruction *InstCombinerImpl::visitFDiv (BinaryOperator &I) {
1868
2011
Module *M = I.getModule ();
1869
2012
@@ -1888,6 +2031,24 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
1888
2031
return R;
1889
2032
1890
2033
Value *Op0 = I.getOperand (0 ), *Op1 = I.getOperand (1 );
2034
+
2035
+ // Convert
2036
+ // x = 1.0/sqrt(a)
2037
+ // r1 = x * x;
2038
+ // r2 = a/sqrt(a);
2039
+ //
2040
+ // TO
2041
+ //
2042
+ // r1 = 1/a
2043
+ // r2 = sqrt(a)
2044
+ // x = r1 * r2
2045
+ SmallPtrSet<Instruction *, 2 > R1, R2;
2046
+ if (getFSqrtDivOptPattern (&I, R1, R2) && isFSqrtDivToFMulLegal (&I, R1, R2)) {
2047
+ CallInst *CI = cast<CallInst>(I.getOperand (1 ));
2048
+ if (Value *D = convertFSqrtDivIntoFMul (CI, &I, R1, R2, Builder))
2049
+ return replaceInstUsesWith (I, D);
2050
+ }
2051
+
1891
2052
if (isa<Constant>(Op0))
1892
2053
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
1893
2054
if (Instruction *R = FoldOpIntoSelect (I, SI))
0 commit comments