@@ -471,6 +471,7 @@ class CodeGenPrepare {
471
471
bool replaceMathCmpWithIntrinsic (BinaryOperator *BO, Value *Arg0, Value *Arg1,
472
472
CmpInst *Cmp, Intrinsic::ID IID);
473
473
bool optimizeCmp (CmpInst *Cmp, ModifyDT &ModifiedDT);
474
+ bool optimizeRem (Instruction *Rem);
474
475
bool combineToUSubWithOverflow (CmpInst *Cmp, ModifyDT &ModifiedDT);
475
476
bool combineToUAddWithOverflow (CmpInst *Cmp, ModifyDT &ModifiedDT);
476
477
void verifyBFIUpdates (Function &F);
@@ -1974,6 +1975,161 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1974
1975
return true ;
1975
1976
}
1976
1977
1978
+ static bool isRemOfLoopIncrementWithLIV (Value *Rem, const LoopInfo *LI,
1979
+ Value *&RemAmtOut,
1980
+ std::optional<bool > &AddOrSubOut,
1981
+ Value *&AddOrSubOffsetOut,
1982
+ PHINode *&LoopIncrPNOut) {
1983
+ Value *Incr, *RemAmt;
1984
+ if (!isa<Instruction>(Rem))
1985
+ return false ;
1986
+ // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1987
+ if (!match (Rem, m_URem (m_Value (Incr), m_Value (RemAmt))))
1988
+ return false ;
1989
+
1990
+ // Only trivially analyzable loops.
1991
+ Loop *L = LI->getLoopFor (cast<Instruction>(Rem)->getParent ());
1992
+ if (L == nullptr || L->getLoopPreheader () == nullptr ||
1993
+ L->getLoopLatch () == nullptr )
1994
+ return false ;
1995
+
1996
+ std::optional<bool > AddOrSub;
1997
+ Value *AddOrSubOffset;
1998
+ // Find out loop increment PHI.
1999
+ PHINode *PN = dyn_cast<PHINode>(Incr);
2000
+ if (PN != nullptr ) {
2001
+ AddOrSub = std::nullopt;
2002
+ AddOrSubOffset = nullptr ;
2003
+ } else {
2004
+ // Search through a NUW add/sub.
2005
+ Value *V0, *V1;
2006
+ if (match (Incr, m_NUWAddLike (m_Value (V0), m_Value (V1))))
2007
+ AddOrSub = true ;
2008
+ else if (match (Incr, m_NUWSub (m_Value (V0), m_Value (V1))))
2009
+ AddOrSub = false ;
2010
+ else
2011
+ return false ;
2012
+
2013
+ PN = dyn_cast<PHINode>(V0);
2014
+ if (PN != nullptr ) {
2015
+ AddOrSubOffset = V1;
2016
+ } else if (*AddOrSub) {
2017
+ PN = dyn_cast<PHINode>(V1);
2018
+ AddOrSubOffset = V0;
2019
+ }
2020
+ }
2021
+
2022
+ if (PN == nullptr )
2023
+ return false ;
2024
+
2025
+ // This isn't strictly necessary, what we really need is one increment and any
2026
+ // amount of initial values all being the same.
2027
+ if (PN->getNumIncomingValues () != 2 )
2028
+ return false ;
2029
+
2030
+ // Only works if the remainder amount is a loop invaraint
2031
+ if (!L->isLoopInvariant (RemAmt))
2032
+ return false ;
2033
+
2034
+ // Is the PHI a loop increment?
2035
+ auto LoopIncrInfo = getIVIncrement (PN, LI);
2036
+ if (!LoopIncrInfo.has_value ())
2037
+ return false ;
2038
+
2039
+ // We need remainder_amount % increment_amount to be zero. Increment of one
2040
+ // satisfies that without any special logic and is overwhelmingly the common
2041
+ // case.
2042
+ if (!match (LoopIncrInfo->second , m_One ()))
2043
+ return false ;
2044
+
2045
+ // Need the increment to not overflow.
2046
+ if (!match (LoopIncrInfo->first , m_NUWAdd (m_Value (), m_Value ())))
2047
+ return false ;
2048
+
2049
+ if (PN->getBasicBlockIndex (L->getLoopLatch ()) < 0 ||
2050
+ PN->getBasicBlockIndex (L->getLoopPreheader ()) < 0 )
2051
+ return false ;
2052
+
2053
+ // Set output variables.
2054
+ RemAmtOut = RemAmt;
2055
+ LoopIncrPNOut = PN;
2056
+ AddOrSubOut = AddOrSub;
2057
+ AddOrSubOffsetOut = AddOrSubOffset;
2058
+
2059
+ return true ;
2060
+ }
2061
+
2062
+ // Try to transform:
2063
+ //
2064
+ // for(i = Start; i < End; ++i)
2065
+ // Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2066
+ //
2067
+ // ->
2068
+ //
2069
+ // Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2070
+ // for(i = Start; i < End; ++i, ++rem)
2071
+ // Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2072
+ //
2073
+ // Currently only implemented for `Start` and `IncrLoopInvariant` being zero.
2074
+ static bool foldURemOfLoopIncrement (Instruction *Rem, const LoopInfo *LI,
2075
+ SmallSet<BasicBlock *, 32 > &FreshBBs,
2076
+ bool IsHuge) {
2077
+ std::optional<bool > AddOrSub;
2078
+ Value *AddOrSubOffset, *RemAmt;
2079
+ PHINode *LoopIncrPN;
2080
+ if (!isRemOfLoopIncrementWithLIV (Rem, LI, RemAmt, AddOrSub, AddOrSubOffset,
2081
+ LoopIncrPN))
2082
+ return false ;
2083
+
2084
+ // Only non-constant remainder as the extra IV is is probably not profitable
2085
+ // in that case. Further, since remainder amount is non-constant, only handle
2086
+ // case where `IncrLoopInvariant` and `Start` are 0 to entirely eliminate the
2087
+ // rem (as opposed to just hoisting it outside of the loop).
2088
+ //
2089
+ // Potential TODO: Should we have a check for how "nested" this remainder
2090
+ // operation is? The new code runs every iteration so if the remainder is
2091
+ // guarded behind unlikely conditions this might not be worth it.
2092
+ if (AddOrSub.has_value () || match (RemAmt, m_ImmConstant ()))
2093
+ return false ;
2094
+ Loop *L = LI->getLoopFor (Rem->getParent ());
2095
+ if (!match (LoopIncrPN->getIncomingValueForBlock (L->getLoopPreheader ()),
2096
+ m_Zero ()))
2097
+ return false ;
2098
+
2099
+ // Create new remainder with induction variable.
2100
+ Type *Ty = Rem->getType ();
2101
+ IRBuilder<> Builder (Rem->getContext ());
2102
+
2103
+ Builder.SetInsertPoint (LoopIncrPN);
2104
+ PHINode *NewRem = Builder.CreatePHI (Ty, 2 );
2105
+
2106
+ Builder.SetInsertPoint (cast<Instruction>(
2107
+ LoopIncrPN->getIncomingValueForBlock (L->getLoopLatch ())));
2108
+ // `(add (urem x, y), 1)` is always nuw.
2109
+ Value *RemAdd = Builder.CreateNUWAdd (NewRem, ConstantInt::get (Ty, 1 ));
2110
+ Value *RemCmp = Builder.CreateICmp (ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2111
+ Value *RemSel =
2112
+ Builder.CreateSelect (RemCmp, Constant::getNullValue (Ty), RemAdd);
2113
+
2114
+ NewRem->addIncoming (Constant::getNullValue (Ty), L->getLoopPreheader ());
2115
+ NewRem->addIncoming (RemSel, L->getLoopLatch ());
2116
+
2117
+ // Insert all touched BBs.
2118
+ FreshBBs.insert (LoopIncrPN->getParent ());
2119
+ FreshBBs.insert (L->getLoopLatch ());
2120
+ FreshBBs.insert (Rem->getParent ());
2121
+
2122
+ replaceAllUsesWith (Rem, NewRem, FreshBBs, IsHuge);
2123
+ Rem->eraseFromParent ();
2124
+ return true ;
2125
+ }
2126
+
2127
+ bool CodeGenPrepare::optimizeRem (Instruction *Rem) {
2128
+ if (foldURemOfLoopIncrement (Rem, LI, FreshBBs, IsHugeFunc))
2129
+ return true ;
2130
+ return false ;
2131
+ }
2132
+
1977
2133
bool CodeGenPrepare::optimizeCmp (CmpInst *Cmp, ModifyDT &ModifiedDT) {
1978
2134
if (sinkCmpExpression (Cmp, *TLI))
1979
2135
return true ;
@@ -8360,6 +8516,11 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8360
8516
if (optimizeCmp (Cmp, ModifiedDT))
8361
8517
return true ;
8362
8518
8519
+ if (match (I, m_URem (m_Value (), m_Value ())) ||
8520
+ match (I, m_SRem (m_Value (), m_Value ())))
8521
+ if (optimizeRem (I))
8522
+ return true ;
8523
+
8363
8524
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8364
8525
LI->setMetadata (LLVMContext::MD_invariant_group, nullptr );
8365
8526
bool Modified = optimizeLoadExt (LI);
0 commit comments