@@ -1976,17 +1976,43 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1976
1976
return true ;
1977
1977
}
1978
1978
1979
- static bool isRemOfLoopIncrementWithLoopInvariant (Instruction *Rem,
1980
- const LoopInfo *LI,
1981
- Value *&RemAmtOut ,
1982
- PHINode *&LoopIncrPNOut) {
1979
+ static bool isRemOfLoopIncrementWithLoopInvariant (
1980
+ Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut ,
1981
+ std::optional< bool > &AddOrSubOut, Value *&AddOrSubInstOut ,
1982
+ Value *&AddOrSubOffsetOut, PHINode *&LoopIncrPNOut) {
1983
1983
Value *Incr, *RemAmt;
1984
1984
// NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1985
1985
if (!match (Rem, m_URem (m_Value (Incr), m_Value (RemAmt))))
1986
1986
return false ;
1987
1987
1988
+ std::optional<bool > AddOrSub;
1989
+ Value *AddOrSubOffset;
1988
1990
// Find out loop increment PHI.
1989
1991
auto *PN = dyn_cast<PHINode>(Incr);
1992
+ if (PN != nullptr ) {
1993
+ AddOrSub = std::nullopt;
1994
+ AddOrSubOffset = nullptr ;
1995
+ } else {
1996
+ // Search through a NUW add/sub on top of the loop increment.
1997
+ Value *V0, *V1;
1998
+ if (match (Incr, m_NUWAddLike (m_Value (V0), m_Value (V1))))
1999
+ AddOrSub = true ;
2000
+ else if (match (Incr, m_NUWSub (m_Value (V0), m_Value (V1))))
2001
+ AddOrSub = false ;
2002
+ else
2003
+ return false ;
2004
+
2005
+ AddOrSubInstOut = Incr;
2006
+
2007
+ PN = dyn_cast<PHINode>(V0);
2008
+ if (PN != nullptr ) {
2009
+ AddOrSubOffset = V1;
2010
+ } else if (*AddOrSub) {
2011
+ PN = dyn_cast<PHINode>(V1);
2012
+ AddOrSubOffset = V0;
2013
+ }
2014
+ }
2015
+
1990
2016
if (!PN)
1991
2017
return false ;
1992
2018
@@ -2026,6 +2052,8 @@ static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem,
2026
2052
// Set output variables.
2027
2053
RemAmtOut = RemAmt;
2028
2054
LoopIncrPNOut = PN;
2055
+ AddOrSubOut = AddOrSub;
2056
+ AddOrSubOffsetOut = AddOrSubOffset;
2029
2057
2030
2058
return true ;
2031
2059
}
@@ -2040,15 +2068,15 @@ static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem,
2040
2068
// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2041
2069
// for(i = Start; i < End; ++i, ++rem)
2042
2070
// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2043
- //
2044
- // Currently only implemented for `IncrLoopInvariant` being zero.
2045
2071
static bool foldURemOfLoopIncrement (Instruction *Rem, const DataLayout *DL,
2046
2072
const LoopInfo *LI,
2047
2073
SmallSet<BasicBlock *, 32 > &FreshBBs,
2048
2074
bool IsHuge) {
2049
- Value *RemAmt;
2075
+ std::optional<bool > AddOrSub;
2076
+ Value *AddOrSubOffset, *RemAmt, *AddOrSubInst;
2050
2077
PHINode *LoopIncrPN;
2051
- if (!isRemOfLoopIncrementWithLoopInvariant (Rem, LI, RemAmt, LoopIncrPN))
2078
+ if (!isRemOfLoopIncrementWithLoopInvariant (
2079
+ Rem, LI, RemAmt, AddOrSub, AddOrSubInst, AddOrSubOffset, LoopIncrPN))
2052
2080
return false ;
2053
2081
2054
2082
// Only non-constant remainder as the extra IV is probably not profitable
@@ -2066,6 +2094,29 @@ static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL,
2066
2094
2067
2095
Loop *L = LI->getLoopFor (LoopIncrPN->getParent ());
2068
2096
Value *Start = LoopIncrPN->getIncomingValueForBlock (L->getLoopPreheader ());
2097
+ // If we have add/sub create initial value for remainder.
2098
+ // The logic here is:
2099
+ // (urem (add/sub nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2100
+ //
2101
+ // Only proceed if the expression simplifies (otherwise we can't fully
2102
+ // optimize out the urem).
2103
+ if (AddOrSub) {
2104
+ assert (AddOrSubOffset && AddOrSubInst &&
2105
+ " We found an add/sub but missing values" );
2106
+ // Without dom-condition/assumption cache we aren't likely to get much out
2107
+ // of a context instruction.
2108
+ const SimplifyQuery Q (*DL);
2109
+ bool NSW = cast<OverflowingBinaryOperator>(AddOrSubInst)->hasNoSignedWrap ();
2110
+ if (*AddOrSub)
2111
+ Start = simplifyAddInst (Start, AddOrSubOffset, /* IsNSW=*/ NSW,
2112
+ /* IsNUW=*/ true , Q);
2113
+ else
2114
+ Start = simplifySubInst (Start, AddOrSubOffset, /* IsNSW=*/ NSW,
2115
+ /* IsNUW=*/ true , Q);
2116
+ if (!Start)
2117
+ return false ;
2118
+ }
2119
+
2069
2120
// If we can't fully optimize out the `rem`, skip this transform.
2070
2121
Start = simplifyURemInst (Start, RemAmt, *DL);
2071
2122
if (!Start)
@@ -2096,6 +2147,8 @@ static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL,
2096
2147
2097
2148
replaceAllUsesWith (Rem, NewRem, FreshBBs, IsHuge);
2098
2149
Rem->eraseFromParent ();
2150
+ if (AddOrSubInst && AddOrSubInst->use_empty ())
2151
+ cast<Instruction>(AddOrSubInst)->eraseFromParent ();
2099
2152
return true ;
2100
2153
}
2101
2154
0 commit comments