Skip to content

Commit ea39f97

Browse files
authored
Revert "[LoopIdiom] Support 'shift until less-than' idiom (#95002)" (#98065)
Reverts #95002 while I investigate buildbot failure. This reverts commit 83b01aa.
1 parent d528537 commit ea39f97

File tree

2 files changed

+36
-1033
lines changed

2 files changed

+36
-1033
lines changed

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 36 additions & 255 deletions
Original file line numberDiff line numberDiff line change
@@ -231,19 +231,12 @@ class LoopIdiomRecognize {
231231
bool recognizePopcount();
232232
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
233233
PHINode *CntPhi, Value *Var);
234-
bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
235-
bool ZeroCheck, size_t CanonicalSize);
236-
bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
237-
Instruction *DefX, PHINode *CntPhi,
238-
Instruction *CntInst);
239234
bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
240-
bool recognizeShiftUntilLessThan();
241235
void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
242236
Instruction *CntInst, PHINode *CntPhi,
243237
Value *Var, Instruction *DefX,
244238
const DebugLoc &DL, bool ZeroCheck,
245-
bool IsCntPhiUsedOutsideLoop,
246-
bool InsertSub = false);
239+
bool IsCntPhiUsedOutsideLoop);
247240

248241
bool recognizeShiftUntilBitTest();
249242
bool recognizeShiftUntilZero();
@@ -1489,8 +1482,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
14891482
<< CurLoop->getHeader()->getName() << "\n");
14901483

14911484
return recognizePopcount() || recognizeAndInsertFFS() ||
1492-
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1493-
recognizeShiftUntilLessThan();
1485+
recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
14941486
}
14951487

14961488
/// Check if the given conditional branch is based on the comparison between
@@ -1525,34 +1517,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15251517
return nullptr;
15261518
}
15271519

1528-
/// Check if the given conditional branch is based on an unsigned less-than
1529-
/// comparison between a variable and a constant, and if the comparison is false
1530-
/// the control yields to the loop entry. If the branch matches the behaviour,
1531-
/// the variable involved in the comparison is returned.
1532-
static Value *matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry,
1533-
uint64_t &Threshold) {
1534-
if (!BI || !BI->isConditional())
1535-
return nullptr;
1536-
1537-
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
1538-
if (!Cond)
1539-
return nullptr;
1540-
1541-
ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
1542-
if (!CmpConst)
1543-
return nullptr;
1544-
1545-
BasicBlock *FalseSucc = BI->getSuccessor(1);
1546-
ICmpInst::Predicate Pred = Cond->getPredicate();
1547-
1548-
if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
1549-
Threshold = CmpConst->getZExtValue();
1550-
return Cond->getOperand(0);
1551-
}
1552-
1553-
return nullptr;
1554-
}
1555-
15561520
// Check if the recurrence variable `VarX` is in the right form to create
15571521
// the idiom. Returns the value coerced to a PHINode if so.
15581522
static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
@@ -1564,107 +1528,6 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
15641528
return nullptr;
15651529
}
15661530

1567-
/// Return true if the idiom is detected in the loop.
1568-
///
1569-
/// Additionally:
1570-
/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1571-
/// or nullptr if there is no such.
1572-
/// 2) \p CntPhi is set to the corresponding phi node
1573-
/// or nullptr if there is no such.
1574-
/// 3) \p InitX is set to the value whose CTLZ could be used.
1575-
/// 4) \p DefX is set to the instruction calculating Loop exit condition.
1576-
/// 5) \p Threshold is set to the constant involved in the unsigned less-than
1577-
/// comparison.
1578-
///
1579-
/// The core idiom we are trying to detect is:
1580-
/// \code
1581-
/// if (x0 < 2)
1582-
/// goto loop-exit // the precondition of the loop
1583-
/// cnt0 = init-val
1584-
/// do {
1585-
/// x = phi (x0, x.next); //PhiX
1586-
/// cnt = phi (cnt0, cnt.next)
1587-
///
1588-
/// cnt.next = cnt + 1;
1589-
/// ...
1590-
/// x.next = x >> 1; // DefX
1591-
/// } while (x >= 4)
1592-
/// loop-exit:
1593-
/// \endcode
1594-
static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL,
1595-
Intrinsic::ID &IntrinID,
1596-
Value *&InitX, Instruction *&CntInst,
1597-
PHINode *&CntPhi, Instruction *&DefX,
1598-
uint64_t &Threshold) {
1599-
BasicBlock *LoopEntry;
1600-
1601-
DefX = nullptr;
1602-
CntInst = nullptr;
1603-
CntPhi = nullptr;
1604-
LoopEntry = *(CurLoop->block_begin());
1605-
1606-
// step 1: Check if the loop-back branch is in desirable form.
1607-
if (Value *T = matchShiftULTCondition(
1608-
dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
1609-
Threshold))
1610-
DefX = dyn_cast<Instruction>(T);
1611-
else
1612-
return false;
1613-
1614-
// step 2: Check the recurrence of variable X
1615-
if (!DefX || !isa<PHINode>(DefX))
1616-
return false;
1617-
1618-
PHINode *VarPhi = cast<PHINode>(DefX);
1619-
int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
1620-
if (Idx == -1)
1621-
return false;
1622-
1623-
DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
1624-
if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
1625-
return false;
1626-
1627-
// step 3: detect instructions corresponding to "x.next = x >> 1"
1628-
if (DefX->getOpcode() != Instruction::LShr)
1629-
return false;
1630-
1631-
IntrinID = Intrinsic::ctlz;
1632-
ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
1633-
if (!Shft || !Shft->isOne())
1634-
return false;
1635-
1636-
InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
1637-
1638-
// step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1639-
// or cnt.next = cnt + -1.
1640-
// TODO: We can skip the step. If loop trip count is known (CTLZ),
1641-
// then all uses of "cnt.next" could be optimized to the trip count
1642-
// plus "cnt0". Currently it is not optimized.
1643-
// This step could be used to detect POPCNT instruction:
1644-
// cnt.next = cnt + (x.next & 1)
1645-
for (Instruction &Inst : llvm::make_range(
1646-
LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
1647-
if (Inst.getOpcode() != Instruction::Add)
1648-
continue;
1649-
1650-
ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
1651-
if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
1652-
continue;
1653-
1654-
PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
1655-
if (!Phi)
1656-
continue;
1657-
1658-
CntInst = &Inst;
1659-
CntPhi = Phi;
1660-
break;
1661-
}
1662-
if (!CntInst)
1663-
return false;
1664-
1665-
return true;
1666-
}
1667-
16681531
/// Return true iff the idiom is detected in the loop.
16691532
///
16701533
/// Additionally:
@@ -1893,35 +1756,27 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
18931756
return true;
18941757
}
18951758

1896-
// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1897-
// profitable if we delete the loop.
1898-
bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
1899-
Value *InitX, bool ZeroCheck,
1900-
size_t CanonicalSize) {
1901-
const Value *Args[] = {InitX,
1902-
ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
1759+
/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1760+
/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1761+
/// trip count returns true; otherwise, returns false.
1762+
bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1763+
// Give up if the loop has multiple blocks or multiple backedges.
1764+
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1765+
return false;
19031766

1904-
// @llvm.dbg doesn't count as they have no semantic effect.
1905-
auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1906-
uint32_t HeaderSize =
1907-
std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
1767+
Intrinsic::ID IntrinID;
1768+
Value *InitX;
1769+
Instruction *DefX = nullptr;
1770+
PHINode *CntPhi = nullptr;
1771+
Instruction *CntInst = nullptr;
1772+
// Help decide if transformation is profitable. For ShiftUntilZero idiom,
1773+
// this is always 6.
1774+
size_t IdiomCanonicalSize = 6;
19081775

1909-
IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
1910-
InstructionCost Cost = TTI->getIntrinsicInstrCost(
1911-
Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1912-
if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
1776+
if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
1777+
CntInst, CntPhi, DefX))
19131778
return false;
19141779

1915-
return true;
1916-
}
1917-
1918-
/// Convert CTLZ / CTTZ idiom loop into countable loop.
1919-
/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
1920-
/// returns false.
1921-
bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
1922-
Value *InitX, Instruction *DefX,
1923-
PHINode *CntPhi,
1924-
Instruction *CntInst) {
19251780
bool IsCntPhiUsedOutsideLoop = false;
19261781
for (User *U : CntPhi->users())
19271782
if (!CurLoop->contains(cast<Instruction>(U))) {
@@ -1963,107 +1818,35 @@ bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
19631818
ZeroCheck = true;
19641819
}
19651820

1966-
// FFS idiom loop has only 6 instructions:
1821+
// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1822+
// profitable if we delete the loop.
1823+
1824+
// the loop has only 6 instructions:
19671825
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
19681826
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
19691827
// %shr = ashr %n.addr.0, 1
19701828
// %tobool = icmp eq %shr, 0
19711829
// %inc = add nsw %i.0, 1
19721830
// br i1 %tobool
1973-
size_t IdiomCanonicalSize = 6;
1974-
if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1975-
return false;
1976-
1977-
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1978-
DefX->getDebugLoc(), ZeroCheck,
1979-
IsCntPhiUsedOutsideLoop);
1980-
return true;
1981-
}
1982-
1983-
/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1984-
/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1985-
/// trip count returns true; otherwise, returns false.
1986-
bool LoopIdiomRecognize::recognizeAndInsertFFS() {
1987-
// Give up if the loop has multiple blocks or multiple backedges.
1988-
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
1989-
return false;
1990-
1991-
Intrinsic::ID IntrinID;
1992-
Value *InitX;
1993-
Instruction *DefX = nullptr;
1994-
PHINode *CntPhi = nullptr;
1995-
Instruction *CntInst = nullptr;
1996-
1997-
if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
1998-
DefX))
1999-
return false;
20001831

2001-
return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2002-
}
2003-
2004-
bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
2005-
// Give up if the loop has multiple blocks or multiple backedges.
2006-
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
2007-
return false;
2008-
2009-
Intrinsic::ID IntrinID;
2010-
Value *InitX;
2011-
Instruction *DefX = nullptr;
2012-
PHINode *CntPhi = nullptr;
2013-
Instruction *CntInst = nullptr;
2014-
2015-
uint64_t LoopThreshold;
2016-
if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
2017-
CntPhi, DefX, LoopThreshold))
2018-
return false;
2019-
2020-
if (LoopThreshold == 2) {
2021-
// Treat as regular FFS.
2022-
return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
2023-
}
2024-
2025-
// Look for Floor Log2 Idiom.
2026-
if (LoopThreshold != 4)
2027-
return false;
2028-
2029-
// Abort if CntPhi is used outside of the loop.
2030-
for (User *U : CntPhi->users())
2031-
if (!CurLoop->contains(cast<Instruction>(U)))
2032-
return false;
2033-
2034-
// It is safe to assume Preheader exist as it was checked in
2035-
// parent function RunOnLoop.
2036-
BasicBlock *PH = CurLoop->getLoopPreheader();
2037-
auto *PreCondBB = PH->getSinglePredecessor();
2038-
if (!PreCondBB)
2039-
return false;
2040-
auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
2041-
if (!PreCondBI)
2042-
return false;
2043-
2044-
uint64_t PreLoopThreshold;
2045-
if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
2046-
PreLoopThreshold != 2)
2047-
return false;
1832+
const Value *Args[] = {InitX,
1833+
ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
20481834

2049-
bool ZeroCheck = true;
1835+
// @llvm.dbg doesn't count as they have no semantic effect.
1836+
auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
1837+
uint32_t HeaderSize =
1838+
std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
20501839

2051-
// the loop has only 6 instructions:
2052-
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2053-
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2054-
// %shr = ashr %n.addr.0, 1
2055-
// %tobool = icmp ult %n.addr.0, C
2056-
// %inc = add nsw %i.0, 1
2057-
// br i1 %tobool
2058-
size_t IdiomCanonicalSize = 6;
2059-
if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1840+
IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
1841+
InstructionCost Cost =
1842+
TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1843+
if (HeaderSize != IdiomCanonicalSize &&
1844+
Cost > TargetTransformInfo::TCC_Basic)
20601845
return false;
20611846

2062-
// log2(x) = w − 1 − clz(x)
20631847
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
20641848
DefX->getDebugLoc(), ZeroCheck,
2065-
/*IsCntPhiUsedOutsideLoop=*/false,
2066-
/*InsertSub=*/true);
1849+
IsCntPhiUsedOutsideLoop);
20671850
return true;
20681851
}
20691852

@@ -2178,7 +1961,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
21781961
void LoopIdiomRecognize::transformLoopToCountable(
21791962
Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
21801963
PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
2181-
bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
1964+
bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
21821965
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
21831966

21841967
// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -2208,8 +1991,6 @@ void LoopIdiomRecognize::transformLoopToCountable(
22081991
Type *CountTy = Count->getType();
22091992
Count = Builder.CreateSub(
22101993
ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
2211-
if (InsertSub)
2212-
Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
22131994
Value *NewCount = Count;
22141995
if (IsCntPhiUsedOutsideLoop)
22151996
Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));

0 commit comments

Comments
 (0)