20
20
//
21
21
// TODO List:
22
22
//
23
- // Future loop memory idioms to recognize: memcmp, etc.
23
+ // Future loop memory idioms to recognize:
24
+ // memcmp, strlen, etc.
24
25
//
25
26
// This could recognize common matrix multiplies and dot product idioms and
26
27
// replace them with calls to BLAS (if linked in??).
32
33
#include " llvm/ADT/ArrayRef.h"
33
34
#include " llvm/ADT/DenseMap.h"
34
35
#include " llvm/ADT/MapVector.h"
35
- #include " llvm/ADT/STLExtras.h"
36
36
#include " llvm/ADT/SetVector.h"
37
37
#include " llvm/ADT/SmallPtrSet.h"
38
38
#include " llvm/ADT/SmallVector.h"
@@ -97,7 +97,6 @@ using namespace llvm;
97
97
STATISTIC (NumMemSet, " Number of memset's formed from loop stores" );
98
98
STATISTIC (NumMemCpy, " Number of memcpy's formed from loop load+stores" );
99
99
STATISTIC (NumMemMove, " Number of memmove's formed from loop load+stores" );
100
- STATISTIC (NumStrLen, " Number of strlen's and wcslen's formed from loop loads" );
101
100
STATISTIC (
102
101
NumShiftUntilBitTest,
103
102
" Number of uncountable loops recognized as 'shift until bitttest' idiom" );
@@ -127,22 +126,6 @@ static cl::opt<bool, true>
127
126
cl::location(DisableLIRP::Memcpy), cl::init(false ),
128
127
cl::ReallyHidden);
129
128
130
- bool DisableLIRP::Strlen;
131
- static cl::opt<bool , true >
132
- DisableLIRPStrlen (" disable-loop-idiom-strlen" ,
133
- cl::desc (" Proceed with loop idiom recognize pass, but do "
134
- " not convert loop(s) to strlen." ),
135
- cl::location(DisableLIRP::Strlen), cl::init(false ),
136
- cl::ReallyHidden);
137
-
138
- bool DisableLIRP::Wcslen;
139
- static cl::opt<bool , true >
140
- EnableLIRPWcslen (" disable-loop-idiom-wcslen" ,
141
- cl::desc (" Proceed with loop idiom recognize pass, "
142
- " enable conversion of loop(s) to wcslen." ),
143
- cl::location(DisableLIRP::Wcslen), cl::init(false ),
144
- cl::ReallyHidden);
145
-
146
129
static cl::opt<bool > UseLIRCodeSizeHeurs (
147
130
" use-lir-code-size-heurs" ,
148
131
cl::desc (" Use loop idiom recognition code size heuristics when compiling "
@@ -263,7 +246,6 @@ class LoopIdiomRecognize {
263
246
264
247
bool recognizeShiftUntilBitTest ();
265
248
bool recognizeShiftUntilZero ();
266
- bool recognizeAndInsertStrLen ();
267
249
268
250
// / @}
269
251
};
@@ -1512,17 +1494,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
1512
1494
1513
1495
return recognizePopcount () || recognizeAndInsertFFS () ||
1514
1496
recognizeShiftUntilBitTest () || recognizeShiftUntilZero () ||
1515
- recognizeShiftUntilLessThan () || recognizeAndInsertStrLen ();
1516
- }
1517
-
1518
- // / Check if a Value is either a nullptr or a constant int zero
1519
- static bool isZeroConstant (const Value *Val) {
1520
- if (isa<ConstantPointerNull>(Val))
1521
- return true ;
1522
- const ConstantInt *CmpZero = dyn_cast<ConstantInt>(Val);
1523
- if (!CmpZero || !CmpZero->isZero ())
1524
- return false ;
1525
- return true ;
1497
+ recognizeShiftUntilLessThan ();
1526
1498
}
1527
1499
1528
1500
// / Check if the given conditional branch is based on the comparison between
@@ -1540,7 +1512,8 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1540
1512
if (!Cond)
1541
1513
return nullptr ;
1542
1514
1543
- if (!isZeroConstant (Cond->getOperand (1 )))
1515
+ ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1516
+ if (!CmpZero || !CmpZero->isZero ())
1544
1517
return nullptr ;
1545
1518
1546
1519
BasicBlock *TrueSucc = BI->getSuccessor (0 );
@@ -1556,279 +1529,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1556
1529
return nullptr ;
1557
1530
}
1558
1531
1559
- namespace {
1560
-
1561
- class StrlenVerifier {
1562
- public:
1563
- explicit StrlenVerifier (const Loop *CurLoop, ScalarEvolution *SE,
1564
- const TargetLibraryInfo *TLI)
1565
- : CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1566
-
1567
- bool isValidStrlenIdiom () {
1568
- // Give up if the loop has multiple blocks, multiple backedges, or
1569
- // multiple exit blocks
1570
- if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 ||
1571
- !CurLoop->getUniqueExitBlock ())
1572
- return false ;
1573
-
1574
- // It should have a preheader and a branch instruction.
1575
- BasicBlock *Preheader = CurLoop->getLoopPreheader ();
1576
- if (!Preheader)
1577
- return false ;
1578
-
1579
- BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator ());
1580
- if (!EntryBI)
1581
- return false ;
1582
-
1583
- // The loop exit must be conditioned on an icmp with 0 the null terminator.
1584
- // The icmp operand has to be a load on some SSA reg that increments
1585
- // by 1 in the loop.
1586
- BasicBlock *LoopBody = *CurLoop->block_begin ();
1587
-
1588
- // Skip if the body is too big as it most likely is not a strlen idiom.
1589
- if (!LoopBody || LoopBody->size () >= 15 )
1590
- return false ;
1591
-
1592
- BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator ());
1593
- Value *LoopCond = matchCondition (LoopTerm, LoopBody);
1594
- if (!LoopCond)
1595
- return false ;
1596
-
1597
- LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1598
- if (!LoopLoad || LoopLoad->getPointerAddressSpace () != 0 )
1599
- return false ;
1600
-
1601
- OperandType = LoopLoad->getType ();
1602
- if (!OperandType || !OperandType->isIntegerTy ())
1603
- return false ;
1604
-
1605
- // See if the pointer expression is an AddRec with constant step a of form
1606
- // ({n,+,a}) where a is the width of the char type.
1607
- Value *IncPtr = LoopLoad->getPointerOperand ();
1608
- const SCEVAddRecExpr *LoadEv =
1609
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV (IncPtr));
1610
- if (!LoadEv || LoadEv->getLoop () != CurLoop || !LoadEv->isAffine ())
1611
- return false ;
1612
- LoadBaseEv = LoadEv->getStart ();
1613
-
1614
- LLVM_DEBUG ({
1615
- dbgs () << " pointer load scev: " ;
1616
- LoadEv->print (outs ());
1617
- dbgs () << " \n " ;
1618
- });
1619
-
1620
- const SCEVConstant *Step =
1621
- dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence (*SE));
1622
- if (!Step)
1623
- return false ;
1624
-
1625
- unsigned StepSize = 0 ;
1626
- StepSizeCI = dyn_cast<ConstantInt>(Step->getValue ());
1627
- if (!StepSizeCI)
1628
- return false ;
1629
- StepSize = StepSizeCI->getZExtValue ();
1630
-
1631
- // Verify that StepSize is consistent with platform char width.
1632
- OpWidth = OperandType->getIntegerBitWidth ();
1633
- unsigned WcharSize = TLI->getWCharSize (*LoopLoad->getModule ());
1634
- if (OpWidth != StepSize * 8 )
1635
- return false ;
1636
- if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32 )
1637
- return false ;
1638
- if (OpWidth >= 16 )
1639
- if (OpWidth != WcharSize * 8 )
1640
- return false ;
1641
-
1642
- // Scan every instruction in the loop to ensure there are no side effects.
1643
- for (Instruction &I : *LoopBody)
1644
- if (I.mayHaveSideEffects ())
1645
- return false ;
1646
-
1647
- BasicBlock *LoopExitBB = CurLoop->getExitBlock ();
1648
- if (!LoopExitBB)
1649
- return false ;
1650
-
1651
- for (PHINode &PN : LoopExitBB->phis ()) {
1652
- if (!SE->isSCEVable (PN.getType ()))
1653
- return false ;
1654
-
1655
- const SCEV *Ev = SE->getSCEV (&PN);
1656
- if (!Ev)
1657
- return false ;
1658
-
1659
- LLVM_DEBUG ({
1660
- dbgs () << " loop exit phi scev: " ;
1661
- Ev->print (dbgs ());
1662
- dbgs () << " \n " ;
1663
- });
1664
-
1665
- // Since we verified that the loop trip count will be a valid strlen
1666
- // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1667
- // SCEVExpander materialize the loop output.
1668
- const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1669
- if (!AddRecEv || !AddRecEv->isAffine ())
1670
- return false ;
1671
-
1672
- // We only want RecAddExpr with recurrence step that is constant. This
1673
- // is good enough for all the idioms we want to recognize. Later we expand
1674
- // and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1675
- if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence (*SE)))
1676
- return false ;
1677
- }
1678
-
1679
- return true ;
1680
- }
1681
-
1682
- public:
1683
- const Loop *CurLoop;
1684
- ScalarEvolution *SE;
1685
- const TargetLibraryInfo *TLI;
1686
-
1687
- unsigned OpWidth;
1688
- ConstantInt *StepSizeCI;
1689
- const SCEV *LoadBaseEv;
1690
- Type *OperandType;
1691
- };
1692
-
1693
- } // namespace
1694
-
1695
- // / The Strlen Idiom we are trying to detect has the following structure
1696
- // /
1697
- // / preheader:
1698
- // / ...
1699
- // / br label %body, ...
1700
- // /
1701
- // / body:
1702
- // / ... ; %0 is incremented by a gep
1703
- // / %1 = load i8, ptr %0, align 1
1704
- // / %2 = icmp eq i8 %1, 0
1705
- // / br i1 %2, label %exit, label %body
1706
- // /
1707
- // / exit:
1708
- // / %lcssa = phi [%0, %body], ...
1709
- // /
1710
- // / We expect the strlen idiom to have a load of a character type that
1711
- // / is compared against '\0', and such load pointer operand must have scev
1712
- // / expression of the form {%str,+,c} where c is a ConstantInt of the
1713
- // / appropiate character width for the idiom, and %str is the base of the string
1714
- // / And, that all lcssa phis have the form {...,+,n} where n is a constant,
1715
- // /
1716
- // / When transforming the output of the strlen idiom, the lccsa phi are
1717
- // / expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1718
- // / and all subsequent uses are replaced. For example,
1719
- // /
1720
- // / \code{.c}
1721
- // / const char* base = str;
1722
- // / while (*str != '\0')
1723
- // / ++str;
1724
- // / size_t result = str - base;
1725
- // / \endcode
1726
- // /
1727
- // / will be transformed as follows: The idiom will be replaced by a strlen
1728
- // / computation to compute the address of the null terminator of the string.
1729
- // /
1730
- // / \code{.c}
1731
- // / const char* base = str;
1732
- // / const char* end = base + strlen(str);
1733
- // / size_t result = end - base;
1734
- // / \endcode
1735
- // /
1736
- // / In the case we index by an induction variable, as long as the induction
1737
- // / variable has a constant int increment, we can replace all such indvars
1738
- // / with the closed form computation of strlen
1739
- // /
1740
- // / \code{.c}
1741
- // / size_t i = 0;
1742
- // / while (str[i] != '\0')
1743
- // / ++i;
1744
- // / size_t result = i;
1745
- // / \endcode
1746
- // /
1747
- // / Will be replaced by
1748
- // /
1749
- // / \code{.c}
1750
- // / size_t i = 0 + strlen(str);
1751
- // / size_t result = i;
1752
- // / \endcode
1753
- // /
1754
- bool LoopIdiomRecognize::recognizeAndInsertStrLen () {
1755
- if (DisableLIRP::All)
1756
- return false ;
1757
-
1758
- StrlenVerifier Verifier (CurLoop, SE, TLI);
1759
-
1760
- if (!Verifier.isValidStrlenIdiom ())
1761
- return false ;
1762
-
1763
- BasicBlock *Preheader = CurLoop->getLoopPreheader ();
1764
- BasicBlock *LoopExitBB = CurLoop->getExitBlock ();
1765
-
1766
- IRBuilder<> Builder (Preheader->getTerminator ());
1767
- SCEVExpander Expander (*SE, Preheader->getModule ()->getDataLayout (),
1768
- " strlen_idiom" );
1769
- Value *MaterialzedBase = Expander.expandCodeFor (
1770
- Verifier.LoadBaseEv , Verifier.LoadBaseEv ->getType (),
1771
- Builder.GetInsertPoint ());
1772
-
1773
- Value *StrLenFunc = nullptr ;
1774
- if (Verifier.OpWidth == 8 ) {
1775
- if (DisableLIRP::Strlen)
1776
- return false ;
1777
- if (!isLibFuncEmittable (Preheader->getModule (), TLI, LibFunc_strlen))
1778
- return false ;
1779
- StrLenFunc = emitStrLen (MaterialzedBase, Builder, *DL, TLI);
1780
- } else {
1781
- if (DisableLIRP::Wcslen)
1782
- return false ;
1783
- if (!isLibFuncEmittable (Preheader->getModule (), TLI, LibFunc_wcslen))
1784
- return false ;
1785
- StrLenFunc = emitWcsLen (MaterialzedBase, Builder, *DL, TLI);
1786
- }
1787
- assert (StrLenFunc && " Failed to emit strlen function." );
1788
-
1789
- const SCEV *StrlenEv = SE->getSCEV (StrLenFunc);
1790
- SmallVector<PHINode *, 4 > Cleanup;
1791
- for (PHINode &PN : LoopExitBB->phis ()) {
1792
- // We can now materialize the loop output as all phi have scev {base,+,a}.
1793
- // We expand the phi as:
1794
- // %strlen = call i64 @strlen(%str)
1795
- // %phi.new = base expression + step * %strlen
1796
- const SCEV *Ev = SE->getSCEV (&PN);
1797
- const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1798
- const SCEVConstant *Step =
1799
- dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence (*SE));
1800
- const SCEV *Base = AddRecEv->getStart ();
1801
-
1802
- // It is safe to truncate to base since if base is narrower than size_t
1803
- // the equivalent user code will have to truncate anyways.
1804
- const SCEV *NewEv = SE->getAddExpr (
1805
- Base, SE->getMulExpr (Step, SE->getTruncateOrSignExtend (
1806
- StrlenEv, Base->getType ())));
1807
-
1808
- Value *MaterializedPHI = Expander.expandCodeFor (NewEv, NewEv->getType (),
1809
- Builder.GetInsertPoint ());
1810
- Expander.clear ();
1811
- PN.replaceAllUsesWith (MaterializedPHI);
1812
- Cleanup.push_back (&PN);
1813
- }
1814
-
1815
- // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
1816
- // up by later passes
1817
- for (PHINode *PN : Cleanup)
1818
- RecursivelyDeleteDeadPHINode (PN);
1819
- SE->forgetLoop (CurLoop);
1820
-
1821
- ++NumStrLen;
1822
- LLVM_DEBUG (dbgs () << " Formed strlen idiom: " << *StrLenFunc << " \n " );
1823
- ORE.emit ([&]() {
1824
- return OptimizationRemark (DEBUG_TYPE, " recognizeAndInsertStrLen" ,
1825
- CurLoop->getStartLoc (), Preheader)
1826
- << " Transformed " << StrLenFunc->getName () << " loop idiom" ;
1827
- });
1828
-
1829
- return true ;
1830
- }
1831
-
1832
1532
// / Check if the given conditional branch is based on an unsigned less-than
1833
1533
// / comparison between a variable and a constant, and if the comparison is false
1834
1534
// / the control yields to the loop entry. If the branch matches the behaviour,
0 commit comments