20
20
//
21
21
// TODO List:
22
22
//
23
- // Future loop memory idioms to recognize:
24
- // memcmp, strlen, etc.
23
+ // Future loop memory idioms to recognize: memcmp, etc.
25
24
//
26
25
// This could recognize common matrix multiplies and dot product idioms and
27
26
// replace them with calls to BLAS (if linked in??).
33
32
#include " llvm/ADT/ArrayRef.h"
34
33
#include " llvm/ADT/DenseMap.h"
35
34
#include " llvm/ADT/MapVector.h"
35
+ #include " llvm/ADT/STLExtras.h"
36
36
#include " llvm/ADT/SetVector.h"
37
37
#include " llvm/ADT/SmallPtrSet.h"
38
38
#include " llvm/ADT/SmallVector.h"
@@ -97,6 +97,7 @@ using namespace llvm;
97
97
STATISTIC (NumMemSet, " Number of memset's formed from loop stores" );
98
98
STATISTIC (NumMemCpy, " Number of memcpy's formed from loop load+stores" );
99
99
STATISTIC (NumMemMove, " Number of memmove's formed from loop load+stores" );
100
+ STATISTIC (NumStrLen, " Number of strlen's and wcslen's formed from loop loads" );
100
101
STATISTIC (
101
102
NumShiftUntilBitTest,
102
103
" Number of uncountable loops recognized as 'shift until bitttest' idiom" );
@@ -126,6 +127,22 @@ static cl::opt<bool, true>
126
127
cl::location(DisableLIRP::Memcpy), cl::init(false ),
127
128
cl::ReallyHidden);
128
129
130
+ bool DisableLIRP::Strlen;
131
+ static cl::opt<bool , true >
132
+ DisableLIRPStrlen (" disable-loop-idiom-strlen" ,
133
+ cl::desc (" Proceed with loop idiom recognize pass, but do "
134
+ " not convert loop(s) to strlen." ),
135
+ cl::location(DisableLIRP::Strlen), cl::init(false ),
136
+ cl::ReallyHidden);
137
+
138
+ bool DisableLIRP::Wcslen;
139
+ static cl::opt<bool , true >
140
+ EnableLIRPWcslen (" disable-loop-idiom-wcslen" ,
141
+ cl::desc (" Proceed with loop idiom recognize pass, "
142
+ " enable conversion of loop(s) to wcslen." ),
143
+ cl::location(DisableLIRP::Wcslen), cl::init(false ),
144
+ cl::ReallyHidden);
145
+
129
146
static cl::opt<bool > UseLIRCodeSizeHeurs (
130
147
" use-lir-code-size-heurs" ,
131
148
cl::desc (" Use loop idiom recognition code size heuristics when compiling "
@@ -246,6 +263,7 @@ class LoopIdiomRecognize {
246
263
247
264
bool recognizeShiftUntilBitTest ();
248
265
bool recognizeShiftUntilZero ();
266
+ bool recognizeAndInsertStrLen ();
249
267
250
268
// / @}
251
269
};
@@ -295,7 +313,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
295
313
296
314
// Disable loop idiom recognition if the function's name is a common idiom.
297
315
StringRef Name = L->getHeader ()->getParent ()->getName ();
298
- if (Name == " memset" || Name == " memcpy" )
316
+ if (Name == " memset" || Name == " memcpy" || Name == " strlen" ||
317
+ Name == " wcslen" )
299
318
return false ;
300
319
301
320
// Determine if code size heuristics need to be applied.
@@ -1494,7 +1513,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
1494
1513
1495
1514
return recognizePopcount () || recognizeAndInsertFFS () ||
1496
1515
recognizeShiftUntilBitTest () || recognizeShiftUntilZero () ||
1497
- recognizeShiftUntilLessThan ();
1516
+ recognizeShiftUntilLessThan () || recognizeAndInsertStrLen () ;
1498
1517
}
1499
1518
1500
1519
// / Check if the given conditional branch is based on the comparison between
@@ -1512,7 +1531,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1512
1531
if (!Cond)
1513
1532
return nullptr ;
1514
1533
1515
- ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1534
+ auto *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand (1 ));
1516
1535
if (!CmpZero || !CmpZero->isZero ())
1517
1536
return nullptr ;
1518
1537
@@ -1529,6 +1548,275 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
1529
1548
return nullptr ;
1530
1549
}
1531
1550
1551
+ namespace {
1552
+
1553
+ class StrlenVerifier {
1554
+ public:
1555
+ explicit StrlenVerifier (const Loop *CurLoop, ScalarEvolution *SE,
1556
+ const TargetLibraryInfo *TLI)
1557
+ : CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1558
+
1559
+ bool isValidStrlenIdiom () {
1560
+ // Give up if the loop has multiple blocks, multiple backedges, or
1561
+ // multiple exit blocks
1562
+ if (CurLoop->getNumBackEdges () != 1 || CurLoop->getNumBlocks () != 1 ||
1563
+ !CurLoop->getUniqueExitBlock ())
1564
+ return false ;
1565
+
1566
+ // It should have a preheader and a branch instruction.
1567
+ BasicBlock *Preheader = CurLoop->getLoopPreheader ();
1568
+ if (!Preheader)
1569
+ return false ;
1570
+
1571
+ BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator ());
1572
+ if (!EntryBI)
1573
+ return false ;
1574
+
1575
+ // The loop exit must be conditioned on an icmp with 0 the null terminator.
1576
+ // The icmp operand has to be a load on some SSA reg that increments
1577
+ // by 1 in the loop.
1578
+ BasicBlock *LoopBody = *CurLoop->block_begin ();
1579
+
1580
+ // Skip if the body is too big as it most likely is not a strlen idiom.
1581
+ if (!LoopBody || LoopBody->size () >= 15 )
1582
+ return false ;
1583
+
1584
+ BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator ());
1585
+ Value *LoopCond = matchCondition (LoopTerm, LoopBody);
1586
+ if (!LoopCond)
1587
+ return false ;
1588
+
1589
+ LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1590
+ if (!LoopLoad || LoopLoad->getPointerAddressSpace () != 0 )
1591
+ return false ;
1592
+
1593
+ OperandType = LoopLoad->getType ();
1594
+ if (!OperandType || !OperandType->isIntegerTy ())
1595
+ return false ;
1596
+
1597
+ // See if the pointer expression is an AddRec with constant step a of form
1598
+ // ({n,+,a}) where a is the width of the char type.
1599
+ Value *IncPtr = LoopLoad->getPointerOperand ();
1600
+ const SCEVAddRecExpr *LoadEv =
1601
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV (IncPtr));
1602
+ if (!LoadEv || LoadEv->getLoop () != CurLoop || !LoadEv->isAffine ())
1603
+ return false ;
1604
+ LoadBaseEv = LoadEv->getStart ();
1605
+
1606
+ LLVM_DEBUG (dbgs () << " pointer load scev: " << *LoadEv << " \n " );
1607
+
1608
+ const SCEVConstant *Step =
1609
+ dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence (*SE));
1610
+ if (!Step)
1611
+ return false ;
1612
+
1613
+ unsigned StepSize = 0 ;
1614
+ StepSizeCI = dyn_cast<ConstantInt>(Step->getValue ());
1615
+ if (!StepSizeCI)
1616
+ return false ;
1617
+ StepSize = StepSizeCI->getZExtValue ();
1618
+
1619
+ // Verify that StepSize is consistent with platform char width.
1620
+ OpWidth = OperandType->getIntegerBitWidth ();
1621
+ unsigned WcharSize = TLI->getWCharSize (*LoopLoad->getModule ());
1622
+ if (OpWidth != StepSize * 8 )
1623
+ return false ;
1624
+ if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32 )
1625
+ return false ;
1626
+ if (OpWidth >= 16 )
1627
+ if (OpWidth != WcharSize * 8 )
1628
+ return false ;
1629
+
1630
+ // Scan every instruction in the loop to ensure there are no side effects.
1631
+ for (Instruction &I : *LoopBody)
1632
+ if (I.mayHaveSideEffects ())
1633
+ return false ;
1634
+
1635
+ BasicBlock *LoopExitBB = CurLoop->getExitBlock ();
1636
+ if (!LoopExitBB)
1637
+ return false ;
1638
+
1639
+ for (PHINode &PN : LoopExitBB->phis ()) {
1640
+ if (!SE->isSCEVable (PN.getType ()))
1641
+ return false ;
1642
+
1643
+ const SCEV *Ev = SE->getSCEV (&PN);
1644
+ if (!Ev)
1645
+ return false ;
1646
+
1647
+ LLVM_DEBUG (dbgs () << " loop exit phi scev: " << *Ev << " \n " );
1648
+
1649
+ // Since we verified that the loop trip count will be a valid strlen
1650
+ // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1651
+ // SCEVExpander materialize the loop output.
1652
+ const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1653
+ if (!AddRecEv || !AddRecEv->isAffine ())
1654
+ return false ;
1655
+
1656
+ // We only want RecAddExpr with recurrence step that is constant. This
1657
+ // is good enough for all the idioms we want to recognize. Later we expand
1658
+ // and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1659
+ if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence (*SE)))
1660
+ return false ;
1661
+ }
1662
+
1663
+ return true ;
1664
+ }
1665
+
1666
+ public:
1667
+ const Loop *CurLoop;
1668
+ ScalarEvolution *SE;
1669
+ const TargetLibraryInfo *TLI;
1670
+
1671
+ unsigned OpWidth;
1672
+ ConstantInt *StepSizeCI;
1673
+ const SCEV *LoadBaseEv;
1674
+ Type *OperandType;
1675
+ };
1676
+
1677
+ } // namespace
1678
+
1679
+ // / The Strlen Idiom we are trying to detect has the following structure
1680
+ // /
1681
+ // / preheader:
1682
+ // / ...
1683
+ // / br label %body, ...
1684
+ // /
1685
+ // / body:
1686
+ // / ... ; %0 is incremented by a gep
1687
+ // / %1 = load i8, ptr %0, align 1
1688
+ // / %2 = icmp eq i8 %1, 0
1689
+ // / br i1 %2, label %exit, label %body
1690
+ // /
1691
+ // / exit:
1692
+ // / %lcssa = phi [%0, %body], ...
1693
+ // /
1694
+ // / We expect the strlen idiom to have a load of a character type that
1695
+ // / is compared against '\0', and such load pointer operand must have scev
1696
+ // / expression of the form {%str,+,c} where c is a ConstantInt of the
1697
+ // / appropiate character width for the idiom, and %str is the base of the string
1698
+ // / And, that all lcssa phis have the form {...,+,n} where n is a constant,
1699
+ // /
1700
+ // / When transforming the output of the strlen idiom, the lccsa phi are
1701
+ // / expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1702
+ // / and all subsequent uses are replaced. For example,
1703
+ // /
1704
+ // / \code{.c}
1705
+ // / const char* base = str;
1706
+ // / while (*str != '\0')
1707
+ // / ++str;
1708
+ // / size_t result = str - base;
1709
+ // / \endcode
1710
+ // /
1711
+ // / will be transformed as follows: The idiom will be replaced by a strlen
1712
+ // / computation to compute the address of the null terminator of the string.
1713
+ // /
1714
+ // / \code{.c}
1715
+ // / const char* base = str;
1716
+ // / const char* end = base + strlen(str);
1717
+ // / size_t result = end - base;
1718
+ // / \endcode
1719
+ // /
1720
+ // / In the case we index by an induction variable, as long as the induction
1721
+ // / variable has a constant int increment, we can replace all such indvars
1722
+ // / with the closed form computation of strlen
1723
+ // /
1724
+ // / \code{.c}
1725
+ // / size_t i = 0;
1726
+ // / while (str[i] != '\0')
1727
+ // / ++i;
1728
+ // / size_t result = i;
1729
+ // / \endcode
1730
+ // /
1731
+ // / Will be replaced by
1732
+ // /
1733
+ // / \code{.c}
1734
+ // / size_t i = 0 + strlen(str);
1735
+ // / size_t result = i;
1736
+ // / \endcode
1737
+ // /
1738
+ bool LoopIdiomRecognize::recognizeAndInsertStrLen () {
1739
+ if (DisableLIRP::All)
1740
+ return false ;
1741
+
1742
+ StrlenVerifier Verifier (CurLoop, SE, TLI);
1743
+
1744
+ if (!Verifier.isValidStrlenIdiom ())
1745
+ return false ;
1746
+
1747
+ BasicBlock *Preheader = CurLoop->getLoopPreheader ();
1748
+ BasicBlock *LoopExitBB = CurLoop->getExitBlock ();
1749
+
1750
+ if (Verifier.OpWidth == 8 ) {
1751
+ if (DisableLIRP::Strlen)
1752
+ return false ;
1753
+ if (!isLibFuncEmittable (Preheader->getModule (), TLI, LibFunc_strlen))
1754
+ return false ;
1755
+ } else {
1756
+ if (DisableLIRP::Wcslen)
1757
+ return false ;
1758
+ if (!isLibFuncEmittable (Preheader->getModule (), TLI, LibFunc_wcslen))
1759
+ return false ;
1760
+ }
1761
+
1762
+ IRBuilder<> Builder (Preheader->getTerminator ());
1763
+ SCEVExpander Expander (*SE, Preheader->getModule ()->getDataLayout (),
1764
+ " strlen_idiom" );
1765
+ Value *MaterialzedBase = Expander.expandCodeFor (
1766
+ Verifier.LoadBaseEv , Verifier.LoadBaseEv ->getType (),
1767
+ Builder.GetInsertPoint ());
1768
+
1769
+ Value *StrLenFunc = nullptr ;
1770
+ if (Verifier.OpWidth == 8 ) {
1771
+ StrLenFunc = emitStrLen (MaterialzedBase, Builder, *DL, TLI);
1772
+ } else {
1773
+ StrLenFunc = emitWcsLen (MaterialzedBase, Builder, *DL, TLI);
1774
+ }
1775
+ assert (StrLenFunc && " Failed to emit strlen function." );
1776
+
1777
+ const SCEV *StrlenEv = SE->getSCEV (StrLenFunc);
1778
+ SmallVector<PHINode *, 4 > Cleanup;
1779
+ for (PHINode &PN : LoopExitBB->phis ()) {
1780
+ // We can now materialize the loop output as all phi have scev {base,+,a}.
1781
+ // We expand the phi as:
1782
+ // %strlen = call i64 @strlen(%str)
1783
+ // %phi.new = base expression + step * %strlen
1784
+ const SCEV *Ev = SE->getSCEV (&PN);
1785
+ const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1786
+ const SCEVConstant *Step =
1787
+ dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence (*SE));
1788
+ const SCEV *Base = AddRecEv->getStart ();
1789
+
1790
+ // It is safe to truncate to base since if base is narrower than size_t
1791
+ // the equivalent user code will have to truncate anyways.
1792
+ const SCEV *NewEv = SE->getAddExpr (
1793
+ Base, SE->getMulExpr (Step, SE->getTruncateOrSignExtend (
1794
+ StrlenEv, Base->getType ())));
1795
+
1796
+ Value *MaterializedPHI = Expander.expandCodeFor (NewEv, NewEv->getType (),
1797
+ Builder.GetInsertPoint ());
1798
+ Expander.clear ();
1799
+ PN.replaceAllUsesWith (MaterializedPHI);
1800
+ Cleanup.push_back (&PN);
1801
+ }
1802
+
1803
+ // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
1804
+ // up by later passes
1805
+ for (PHINode *PN : Cleanup)
1806
+ RecursivelyDeleteDeadPHINode (PN);
1807
+ SE->forgetLoop (CurLoop);
1808
+
1809
+ ++NumStrLen;
1810
+ LLVM_DEBUG (dbgs () << " Formed strlen idiom: " << *StrLenFunc << " \n " );
1811
+ ORE.emit ([&]() {
1812
+ return OptimizationRemark (DEBUG_TYPE, " recognizeAndInsertStrLen" ,
1813
+ CurLoop->getStartLoc (), Preheader)
1814
+ << " Transformed " << StrLenFunc->getName () << " loop idiom" ;
1815
+ });
1816
+
1817
+ return true ;
1818
+ }
1819
+
1532
1820
// / Check if the given conditional branch is based on an unsigned less-than
1533
1821
// / comparison between a variable and a constant, and if the comparison is false
1534
1822
// / the control yields to the loop entry. If the branch matches the behaviour,
0 commit comments