@@ -231,19 +231,12 @@ class LoopIdiomRecognize {
231
231
bool recognizePopcount ();
232
232
void transformLoopToPopcount (BasicBlock *PreCondBB, Instruction *CntInst,
233
233
PHINode *CntPhi, Value *Var);
234
- bool isProfitableToInsertFFS (Intrinsic::ID IntrinID, Value *InitX,
235
- bool ZeroCheck, size_t CanonicalSize);
236
- bool insertFFSIfProfitable (Intrinsic::ID IntrinID, Value *InitX,
237
- Instruction *DefX, PHINode *CntPhi,
238
- Instruction *CntInst);
239
234
bool recognizeAndInsertFFS (); // / Find First Set: ctlz or cttz
240
- bool recognizeShiftUntilLessThan ();
241
235
void transformLoopToCountable (Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
242
236
Instruction *CntInst, PHINode *CntPhi,
243
237
Value *Var, Instruction *DefX,
244
238
const DebugLoc &DL, bool ZeroCheck,
245
- bool IsCntPhiUsedOutsideLoop,
246
- bool InsertSub = false );
239
+ bool IsCntPhiUsedOutsideLoop);
247
240
248
241
bool recognizeShiftUntilBitTest ();
249
242
bool recognizeShiftUntilZero ();
@@ -1489,8 +1482,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
1489
1482
<< CurLoop->getHeader ()->getName () << " \n " );
1490
1483
1491
1484
return recognizePopcount () || recognizeAndInsertFFS () ||
1492
- recognizeShiftUntilBitTest () || recognizeShiftUntilZero () ||
1493
- recognizeShiftUntilLessThan ();
1485
+ recognizeShiftUntilBitTest () || recognizeShiftUntilZero ();
1494
1486
}
1495
1487
1496
1488
// / Check if the given conditional branch is based on the comparison between
@@ -1525,34 +1517,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1525
1517
return nullptr ;
1526
1518
}
1527
1519
1528
- // / Check if the given conditional branch is based on an unsigned less-than
1529
- // / comparison between a variable and a constant, and if the comparison is false
1530
- // / the control yields to the loop entry. If the branch matches the behaviour,
1531
- // / the variable involved in the comparison is returned.
1532
- static Value *matchShiftULTCondition (BranchInst *BI, BasicBlock *LoopEntry,
1533
- uint64_t &Threshold) {
1534
- if (!BI || !BI->isConditional ())
1535
- return nullptr ;
1536
-
1537
- ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition ());
1538
- if (!Cond)
1539
- return nullptr ;
1540
-
1541
- ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1542
- if (!CmpConst)
1543
- return nullptr ;
1544
-
1545
- BasicBlock *FalseSucc = BI->getSuccessor (1 );
1546
- ICmpInst::Predicate Pred = Cond->getPredicate ();
1547
-
1548
- if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
1549
- Threshold = CmpConst->getZExtValue ();
1550
- return Cond->getOperand (0 );
1551
- }
1552
-
1553
- return nullptr ;
1554
- }
1555
-
1556
1520
// Check if the recurrence variable `VarX` is in the right form to create
1557
1521
// the idiom. Returns the value coerced to a PHINode if so.
1558
1522
static PHINode *getRecurrenceVar (Value *VarX, Instruction *DefX,
@@ -1564,107 +1528,6 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
1564
1528
return nullptr ;
1565
1529
}
1566
1530
1567
- // / Return true if the idiom is detected in the loop.
1568
- // /
1569
- // / Additionally:
1570
- // / 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
1571
- // / or nullptr if there is no such.
1572
- // / 2) \p CntPhi is set to the corresponding phi node
1573
- // / or nullptr if there is no such.
1574
- // / 3) \p InitX is set to the value whose CTLZ could be used.
1575
- // / 4) \p DefX is set to the instruction calculating Loop exit condition.
1576
- // / 5) \p Threshold is set to the constant involved in the unsigned less-than
1577
- // / comparison.
1578
- // /
1579
- // / The core idiom we are trying to detect is:
1580
- // / \code
1581
- // / if (x0 < 2)
1582
- // / goto loop-exit // the precondition of the loop
1583
- // / cnt0 = init-val
1584
- // / do {
1585
- // / x = phi (x0, x.next); //PhiX
1586
- // / cnt = phi (cnt0, cnt.next)
1587
- // /
1588
- // / cnt.next = cnt + 1;
1589
- // / ...
1590
- // / x.next = x >> 1; // DefX
1591
- // / } while (x >= 4)
1592
- // / loop-exit:
1593
- // / \endcode
1594
- static bool detectShiftUntilLessThanIdiom (Loop *CurLoop, const DataLayout &DL,
1595
- Intrinsic::ID &IntrinID,
1596
- Value *&InitX, Instruction *&CntInst,
1597
- PHINode *&CntPhi, Instruction *&DefX,
1598
- uint64_t &Threshold) {
1599
- BasicBlock *LoopEntry;
1600
-
1601
- DefX = nullptr ;
1602
- CntInst = nullptr ;
1603
- CntPhi = nullptr ;
1604
- LoopEntry = *(CurLoop->block_begin ());
1605
-
1606
- // step 1: Check if the loop-back branch is in desirable form.
1607
- if (Value *T = matchShiftULTCondition (
1608
- dyn_cast<BranchInst>(LoopEntry->getTerminator ()), LoopEntry,
1609
- Threshold))
1610
- DefX = dyn_cast<Instruction>(T);
1611
- else
1612
- return false ;
1613
-
1614
- // step 2: Check the recurrence of variable X
1615
- if (!DefX || !isa<PHINode>(DefX))
1616
- return false ;
1617
-
1618
- PHINode *VarPhi = cast<PHINode>(DefX);
1619
- int Idx = VarPhi->getBasicBlockIndex (LoopEntry);
1620
- if (Idx == -1 )
1621
- return false ;
1622
-
1623
- DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue (Idx));
1624
- if (!DefX || DefX->getNumOperands () == 0 || DefX->getOperand (0 ) != VarPhi)
1625
- return false ;
1626
-
1627
- // step 3: detect instructions corresponding to "x.next = x >> 1"
1628
- if (DefX->getOpcode () != Instruction::LShr)
1629
- return false ;
1630
-
1631
- IntrinID = Intrinsic::ctlz;
1632
- ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand (1 ));
1633
- if (!Shft || !Shft->isOne ())
1634
- return false ;
1635
-
1636
- InitX = VarPhi->getIncomingValueForBlock (CurLoop->getLoopPreheader ());
1637
-
1638
- // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
1639
- // or cnt.next = cnt + -1.
1640
- // TODO: We can skip the step. If loop trip count is known (CTLZ),
1641
- // then all uses of "cnt.next" could be optimized to the trip count
1642
- // plus "cnt0". Currently it is not optimized.
1643
- // This step could be used to detect POPCNT instruction:
1644
- // cnt.next = cnt + (x.next & 1)
1645
- for (Instruction &Inst : llvm::make_range (
1646
- LoopEntry->getFirstNonPHI ()->getIterator (), LoopEntry->end ())) {
1647
- if (Inst.getOpcode () != Instruction::Add)
1648
- continue ;
1649
-
1650
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand (1 ));
1651
- if (!Inc || (!Inc->isOne () && !Inc->isMinusOne ()))
1652
- continue ;
1653
-
1654
- PHINode *Phi = getRecurrenceVar (Inst.getOperand (0 ), &Inst, LoopEntry);
1655
- if (!Phi)
1656
- continue ;
1657
-
1658
- CntInst = &Inst;
1659
- CntPhi = Phi;
1660
- break ;
1661
- }
1662
- if (!CntInst)
1663
- return false ;
1664
-
1665
- return true ;
1666
- }
1667
-
1668
1531
// / Return true iff the idiom is detected in the loop.
1669
1532
// /
1670
1533
// / Additionally:
@@ -1893,35 +1756,27 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
1893
1756
return true ;
1894
1757
}
1895
1758
1896
- // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1897
- // profitable if we delete the loop.
1898
- bool LoopIdiomRecognize::isProfitableToInsertFFS (Intrinsic::ID IntrinID,
1899
- Value *InitX, bool ZeroCheck,
1900
- size_t CanonicalSize) {
1901
- const Value *Args[] = {InitX,
1902
- ConstantInt::getBool (InitX-> getContext (), ZeroCheck)} ;
1759
+ // / Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1760
+ // / to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1761
+ // / trip count returns true; otherwise, returns false.
1762
+ bool LoopIdiomRecognize::recognizeAndInsertFFS () {
1763
+ // Give up if the loop has multiple blocks or multiple backedges.
1764
+ if (CurLoop-> getNumBackEdges () != 1 || CurLoop-> getNumBlocks () != 1 )
1765
+ return false ;
1903
1766
1904
- // @llvm.dbg doesn't count as they have no semantic effect.
1905
- auto InstWithoutDebugIt = CurLoop->getHeader ()->instructionsWithoutDebug ();
1906
- uint32_t HeaderSize =
1907
- std::distance (InstWithoutDebugIt.begin (), InstWithoutDebugIt.end ());
1767
+ Intrinsic::ID IntrinID;
1768
+ Value *InitX;
1769
+ Instruction *DefX = nullptr ;
1770
+ PHINode *CntPhi = nullptr ;
1771
+ Instruction *CntInst = nullptr ;
1772
+ // Help decide if transformation is profitable. For ShiftUntilZero idiom,
1773
+ // this is always 6.
1774
+ size_t IdiomCanonicalSize = 6 ;
1908
1775
1909
- IntrinsicCostAttributes Attrs (IntrinID, InitX->getType (), Args);
1910
- InstructionCost Cost = TTI->getIntrinsicInstrCost (
1911
- Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1912
- if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
1776
+ if (!detectShiftUntilZeroIdiom (CurLoop, *DL, IntrinID, InitX,
1777
+ CntInst, CntPhi, DefX))
1913
1778
return false ;
1914
1779
1915
- return true ;
1916
- }
1917
-
1918
- // / Convert CTLZ / CTTZ idiom loop into countable loop.
1919
- // / If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
1920
- // / returns false.
1921
- bool LoopIdiomRecognize::insertFFSIfProfitable (Intrinsic::ID IntrinID,
1922
- Value *InitX, Instruction *DefX,
1923
- PHINode *CntPhi,
1924
- Instruction *CntInst) {
1925
1780
bool IsCntPhiUsedOutsideLoop = false ;
1926
1781
for (User *U : CntPhi->users ())
1927
1782
if (!CurLoop->contains (cast<Instruction>(U))) {
@@ -1963,107 +1818,35 @@ bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
1963
1818
ZeroCheck = true ;
1964
1819
}
1965
1820
1966
- // FFS idiom loop has only 6 instructions:
1821
+ // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
1822
+ // profitable if we delete the loop.
1823
+
1824
+ // the loop has only 6 instructions:
1967
1825
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
1968
1826
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
1969
1827
// %shr = ashr %n.addr.0, 1
1970
1828
// %tobool = icmp eq %shr, 0
1971
1829
// %inc = add nsw %i.0, 1
1972
1830
// br i1 %tobool
1973
- size_t IdiomCanonicalSize = 6 ;
1974
- if (!isProfitableToInsertFFS (IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1975
- return false ;
1976
-
1977
- transformLoopToCountable (IntrinID, PH, CntInst, CntPhi, InitX, DefX,
1978
- DefX->getDebugLoc (), ZeroCheck,
1979
- IsCntPhiUsedOutsideLoop);
1980
- return true ;
1981
- }
1982
-
1983
- // / Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
1984
- // / to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
1985
- // / trip count returns true; otherwise, returns false.
1986
- bool LoopIdiomRecognize::recognizeAndInsertFFS () {
1987
- // Give up if the loop has multiple blocks or multiple backedges.
1988
- if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 )
1989
- return false ;
1990
-
1991
- Intrinsic::ID IntrinID;
1992
- Value *InitX;
1993
- Instruction *DefX = nullptr ;
1994
- PHINode *CntPhi = nullptr ;
1995
- Instruction *CntInst = nullptr ;
1996
-
1997
- if (!detectShiftUntilZeroIdiom (CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
1998
- DefX))
1999
- return false ;
2000
1831
2001
- return insertFFSIfProfitable (IntrinID, InitX, DefX, CntPhi, CntInst);
2002
- }
2003
-
2004
- bool LoopIdiomRecognize::recognizeShiftUntilLessThan () {
2005
- // Give up if the loop has multiple blocks or multiple backedges.
2006
- if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 )
2007
- return false ;
2008
-
2009
- Intrinsic::ID IntrinID;
2010
- Value *InitX;
2011
- Instruction *DefX = nullptr ;
2012
- PHINode *CntPhi = nullptr ;
2013
- Instruction *CntInst = nullptr ;
2014
-
2015
- uint64_t LoopThreshold;
2016
- if (!detectShiftUntilLessThanIdiom (CurLoop, *DL, IntrinID, InitX, CntInst,
2017
- CntPhi, DefX, LoopThreshold))
2018
- return false ;
2019
-
2020
- if (LoopThreshold == 2 ) {
2021
- // Treat as regular FFS.
2022
- return insertFFSIfProfitable (IntrinID, InitX, DefX, CntPhi, CntInst);
2023
- }
2024
-
2025
- // Look for Floor Log2 Idiom.
2026
- if (LoopThreshold != 4 )
2027
- return false ;
2028
-
2029
- // Abort if CntPhi is used outside of the loop.
2030
- for (User *U : CntPhi->users ())
2031
- if (!CurLoop->contains (cast<Instruction>(U)))
2032
- return false ;
2033
-
2034
- // It is safe to assume Preheader exist as it was checked in
2035
- // parent function RunOnLoop.
2036
- BasicBlock *PH = CurLoop->getLoopPreheader ();
2037
- auto *PreCondBB = PH->getSinglePredecessor ();
2038
- if (!PreCondBB)
2039
- return false ;
2040
- auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator ());
2041
- if (!PreCondBI)
2042
- return false ;
2043
-
2044
- uint64_t PreLoopThreshold;
2045
- if (matchShiftULTCondition (PreCondBI, PH, PreLoopThreshold) != InitX ||
2046
- PreLoopThreshold != 2 )
2047
- return false ;
1832
+ const Value *Args[] = {InitX,
1833
+ ConstantInt::getBool (InitX->getContext (), ZeroCheck)};
2048
1834
2049
- bool ZeroCheck = true ;
1835
+ // @llvm.dbg doesn't count as they have no semantic effect.
1836
+ auto InstWithoutDebugIt = CurLoop->getHeader ()->instructionsWithoutDebug ();
1837
+ uint32_t HeaderSize =
1838
+ std::distance (InstWithoutDebugIt.begin (), InstWithoutDebugIt.end ());
2050
1839
2051
- // the loop has only 6 instructions:
2052
- // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
2053
- // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
2054
- // %shr = ashr %n.addr.0, 1
2055
- // %tobool = icmp ult %n.addr.0, C
2056
- // %inc = add nsw %i.0, 1
2057
- // br i1 %tobool
2058
- size_t IdiomCanonicalSize = 6 ;
2059
- if (!isProfitableToInsertFFS (IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
1840
+ IntrinsicCostAttributes Attrs (IntrinID, InitX->getType (), Args);
1841
+ InstructionCost Cost =
1842
+ TTI->getIntrinsicInstrCost (Attrs, TargetTransformInfo::TCK_SizeAndLatency);
1843
+ if (HeaderSize != IdiomCanonicalSize &&
1844
+ Cost > TargetTransformInfo::TCC_Basic)
2060
1845
return false ;
2061
1846
2062
- // log2(x) = w − 1 − clz(x)
2063
1847
transformLoopToCountable (IntrinID, PH, CntInst, CntPhi, InitX, DefX,
2064
1848
DefX->getDebugLoc (), ZeroCheck,
2065
- /* IsCntPhiUsedOutsideLoop=*/ false ,
2066
- /* InsertSub=*/ true );
1849
+ IsCntPhiUsedOutsideLoop);
2067
1850
return true ;
2068
1851
}
2069
1852
@@ -2178,7 +1961,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
2178
1961
void LoopIdiomRecognize::transformLoopToCountable (
2179
1962
Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
2180
1963
PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
2181
- bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub ) {
1964
+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
2182
1965
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator ());
2183
1966
2184
1967
// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -2208,8 +1991,6 @@ void LoopIdiomRecognize::transformLoopToCountable(
2208
1991
Type *CountTy = Count->getType ();
2209
1992
Count = Builder.CreateSub (
2210
1993
ConstantInt::get (CountTy, CountTy->getIntegerBitWidth ()), Count);
2211
- if (InsertSub)
2212
- Count = Builder.CreateSub (Count, ConstantInt::get (CountTy, 1 ));
2213
1994
Value *NewCount = Count;
2214
1995
if (IsCntPhiUsedOutsideLoop)
2215
1996
Count = Builder.CreateAdd (Count, ConstantInt::get (CountTy, 1 ));
0 commit comments