@@ -1976,17 +1976,43 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1976
1976
return true ;
1977
1977
}
1978
1978
1979
- static bool isRemOfLoopIncrementWithLoopInvariant (Instruction *Rem,
1980
- const LoopInfo *LI,
1981
- Value *&RemAmtOut ,
1982
- PHINode *&LoopIncrPNOut) {
1979
+ static bool isRemOfLoopIncrementWithLoopInvariant (
1980
+ Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, bool &AddOrSubOut ,
1981
+ Value *&AddOrSubInstOut, Value *&AddOrSubOffsetOut ,
1982
+ PHINode *&LoopIncrPNOut) {
1983
1983
Value *Incr, *RemAmt;
1984
1984
// NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1985
1985
if (!match (Rem, m_URem (m_Value (Incr), m_Value (RemAmt))))
1986
1986
return false ;
1987
1987
1988
+ bool AddOrSub = false ;
1989
+ Value *AddOrSubOffset;
1988
1990
// Find out loop increment PHI.
1989
1991
auto *PN = dyn_cast<PHINode>(Incr);
1992
+ if (PN != nullptr ) {
1993
+ AddOrSub = false ;
1994
+ AddOrSubOffset = nullptr ;
1995
+ } else {
1996
+ // Search through a NUW add/sub on top of the loop increment.
1997
+ Value *V0, *V1;
1998
+ bool Add = match (Incr, m_NUWAddLike (m_Value (V0), m_Value (V1)));
1999
+ bool Sub = match (Incr, m_NUWSub (m_Value (V0), m_Value (V1)));
2000
+ if (!Add && !Sub)
2001
+ return false ;
2002
+
2003
+ AddOrSub = true ;
2004
+
2005
+ AddOrSubInstOut = Incr;
2006
+
2007
+ PN = dyn_cast<PHINode>(V0);
2008
+ if (PN != nullptr ) {
2009
+ AddOrSubOffset = V1;
2010
+ } else if (Add) {
2011
+ PN = dyn_cast<PHINode>(V1);
2012
+ AddOrSubOffset = V0;
2013
+ }
2014
+ }
2015
+
1990
2016
if (!PN)
1991
2017
return false ;
1992
2018
@@ -2026,6 +2052,8 @@ static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem,
2026
2052
// Set output variables.
2027
2053
RemAmtOut = RemAmt;
2028
2054
LoopIncrPNOut = PN;
2055
+ AddOrSubOut = AddOrSub;
2056
+ AddOrSubOffsetOut = AddOrSubOffset;
2029
2057
2030
2058
return true ;
2031
2059
}
@@ -2040,15 +2068,15 @@ static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem,
2040
2068
// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2041
2069
// for(i = Start; i < End; ++i, ++rem)
2042
2070
// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2043
- //
2044
- // Currently only implemented for `IncrLoopInvariant` being zero.
2045
2071
static bool foldURemOfLoopIncrement (Instruction *Rem, const DataLayout *DL,
2046
2072
const LoopInfo *LI,
2047
2073
SmallSet<BasicBlock *, 32 > &FreshBBs,
2048
2074
bool IsHuge) {
2049
- Value *RemAmt;
2075
+ bool AddOrSub;
2076
+ Value *AddOrSubOffset, *RemAmt, *AddOrSubInst;
2050
2077
PHINode *LoopIncrPN;
2051
- if (!isRemOfLoopIncrementWithLoopInvariant (Rem, LI, RemAmt, LoopIncrPN))
2078
+ if (!isRemOfLoopIncrementWithLoopInvariant (
2079
+ Rem, LI, RemAmt, AddOrSub, AddOrSubInst, AddOrSubOffset, LoopIncrPN))
2052
2080
return false ;
2053
2081
2054
2082
// Only non-constant remainder as the extra IV is probably not profitable
@@ -2066,6 +2094,43 @@ static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL,
2066
2094
2067
2095
Loop *L = LI->getLoopFor (LoopIncrPN->getParent ());
2068
2096
Value *Start = LoopIncrPN->getIncomingValueForBlock (L->getLoopPreheader ());
2097
+ // If we have add/sub create initial value for remainder.
2098
+ // The logic here is:
2099
+ // (urem (add/sub nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2100
+ //
2101
+ // Only proceed if the expression simplifies (otherwise we can't fully
2102
+ // optimize out the urem).
2103
+ if (AddOrSub) {
2104
+ assert (AddOrSubOffset && AddOrSubInst &&
2105
+ " We found an add/sub but missing values" );
2106
+ // Without dom-condition/assumption cache we aren't likely to get much out
2107
+ // of a context instruction.
2108
+ const SimplifyQuery Q (*DL);
2109
+ Instruction::BinaryOps Opc =
2110
+ cast<BinaryOperator>(AddOrSubInst)->getOpcode ();
2111
+ switch (Opc) {
2112
+ case Instruction::Add:
2113
+ Start =
2114
+ simplifyAddInst (Start, AddOrSubOffset,
2115
+ match (AddOrSubInst, m_NSWAdd (m_Value (), m_Value ())),
2116
+ /* IsNUW=*/ true , Q);
2117
+ break ;
2118
+ case Instruction::Sub:
2119
+ Start =
2120
+ simplifySubInst (Start, AddOrSubOffset,
2121
+ match (AddOrSubInst, m_NSWSub (m_Value (), m_Value ())),
2122
+ /* IsNUW=*/ true , Q);
2123
+ break ;
2124
+ case Instruction::Or:
2125
+ Start = simplifyOrInst (Start, AddOrSubOffset, Q);
2126
+ break ;
2127
+ default :
2128
+ llvm_unreachable (" Unknown offset instruction" );
2129
+ }
2130
+ if (!Start)
2131
+ return false ;
2132
+ }
2133
+
2069
2134
// If we can't fully optimize out the `rem`, skip this transform.
2070
2135
Start = simplifyURemInst (Start, RemAmt, *DL);
2071
2136
if (!Start)
@@ -2096,6 +2161,8 @@ static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL,
2096
2161
2097
2162
replaceAllUsesWith (Rem, NewRem, FreshBBs, IsHuge);
2098
2163
Rem->eraseFromParent ();
2164
+ if (AddOrSubInst && AddOrSubInst->use_empty ())
2165
+ cast<Instruction>(AddOrSubInst)->eraseFromParent ();
2099
2166
return true ;
2100
2167
}
2101
2168
0 commit comments