@@ -1979,14 +1979,36 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1979
1979
static bool isRemOfLoopIncrementWithLoopInvariant (Instruction *Rem,
1980
1980
const LoopInfo *LI,
1981
1981
Value *&RemAmtOut,
1982
+ Value *&AddInstOut,
1983
+ Value *&AddOffsetOut,
1982
1984
PHINode *&LoopIncrPNOut) {
1983
1985
Value *Incr, *RemAmt;
1984
1986
// NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1985
1987
if (!match (Rem, m_URem (m_Value (Incr), m_Value (RemAmt))))
1986
1988
return false ;
1987
1989
1990
+ Value *AddInst, *AddOffset;
1988
1991
// Find out loop increment PHI.
1989
1992
auto *PN = dyn_cast<PHINode>(Incr);
1993
+ if (PN != nullptr ) {
1994
+ AddInst = nullptr ;
1995
+ AddOffset = nullptr ;
1996
+ } else {
1997
+ // Search through a NUW add on top of the loop increment.
1998
+ Value *V0, *V1;
1999
+ if (!match (Incr, m_NUWAdd (m_Value (V0), m_Value (V1))))
2000
+ return false ;
2001
+
2002
+ AddInst = Incr;
2003
+ PN = dyn_cast<PHINode>(V0);
2004
+ if (PN != nullptr ) {
2005
+ AddOffset = V1;
2006
+ } else {
2007
+ PN = dyn_cast<PHINode>(V1);
2008
+ AddOffset = V0;
2009
+ }
2010
+ }
2011
+
1990
2012
if (!PN)
1991
2013
return false ;
1992
2014
@@ -2026,6 +2048,8 @@ static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem,
2026
2048
// Set output variables.
2027
2049
RemAmtOut = RemAmt;
2028
2050
LoopIncrPNOut = PN;
2051
+ AddInstOut = AddInst;
2052
+ AddOffsetOut = AddOffset;
2029
2053
2030
2054
return true ;
2031
2055
}
@@ -2040,15 +2064,14 @@ static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem,
2040
2064
// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2041
2065
// for(i = Start; i < End; ++i, ++rem)
2042
2066
// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2043
- //
2044
- // Currently only implemented for `IncrLoopInvariant` being zero.
2045
2067
static bool foldURemOfLoopIncrement (Instruction *Rem, const DataLayout *DL,
2046
2068
const LoopInfo *LI,
2047
2069
SmallSet<BasicBlock *, 32 > &FreshBBs,
2048
2070
bool IsHuge) {
2049
- Value *RemAmt;
2071
+ Value *AddOffset, * RemAmt, *AddInst ;
2050
2072
PHINode *LoopIncrPN;
2051
- if (!isRemOfLoopIncrementWithLoopInvariant (Rem, LI, RemAmt, LoopIncrPN))
2073
+ if (!isRemOfLoopIncrementWithLoopInvariant (Rem, LI, RemAmt, AddInst,
2074
+ AddOffset, LoopIncrPN))
2052
2075
return false ;
2053
2076
2054
2077
// Only non-constant remainder as the extra IV is probably not profitable
@@ -2066,6 +2089,23 @@ static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL,
2066
2089
2067
2090
Loop *L = LI->getLoopFor (LoopIncrPN->getParent ());
2068
2091
Value *Start = LoopIncrPN->getIncomingValueForBlock (L->getLoopPreheader ());
2092
+ // If we have add create initial value for remainder.
2093
+ // The logic here is:
2094
+ // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2095
+ //
2096
+ // Only proceed if the expression simplifies (otherwise we can't fully
2097
+ // optimize out the urem).
2098
+ if (AddInst) {
2099
+ assert (AddOffset && " We found an add but missing values" );
2100
+ // Without dom-condition/assumption cache we aren't likely to get much out
2101
+ // of a context instruction.
2102
+ Start = simplifyAddInst (Start, AddOffset,
2103
+ match (AddInst, m_NSWAdd (m_Value (), m_Value ())),
2104
+ /* IsNUW=*/ true , *DL);
2105
+ if (!Start)
2106
+ return false ;
2107
+ }
2108
+
2069
2109
// If we can't fully optimize out the `rem`, skip this transform.
2070
2110
Start = simplifyURemInst (Start, RemAmt, *DL);
2071
2111
if (!Start)
@@ -2093,9 +2133,12 @@ static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL,
2093
2133
FreshBBs.insert (LoopIncrPN->getParent ());
2094
2134
FreshBBs.insert (L->getLoopLatch ());
2095
2135
FreshBBs.insert (Rem->getParent ());
2096
-
2136
+ if (AddInst)
2137
+ FreshBBs.insert (cast<Instruction>(AddInst)->getParent ());
2097
2138
replaceAllUsesWith (Rem, NewRem, FreshBBs, IsHuge);
2098
2139
Rem->eraseFromParent ();
2140
+ if (AddInst && AddInst->use_empty ())
2141
+ cast<Instruction>(AddInst)->eraseFromParent ();
2099
2142
return true ;
2100
2143
}
2101
2144
0 commit comments