Skip to content

Commit 9694844

Browse files
authored
Reland "[Transforms] LoopIdiomRecognize recognize strlen and wcslen llvm#108985" (llvm#132572)
Reland llvm#108985 Extend `LoopIdiomRecognize` to find and replace loops of the form ```c base = str; while (*str) ++str; ``` and transforming the `strlen` loop idiom into the appropriate `strlen` and `wcslen` library call which will give a small performance boost if replaced. ```c str = base + strlen(base) len = str - base ```
1 parent ed022d9 commit 9694844

File tree

9 files changed

+1318
-5
lines changed

9 files changed

+1318
-5
lines changed

llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ struct DisableLIRP {
3434

3535
/// When true, Memcpy is disabled.
3636
static bool Memcpy;
37+
38+
/// When true, Strlen is disabled.
39+
static bool Strlen;
40+
41+
/// When true, Wcslen is disabled.
42+
static bool Wcslen;
3743
};
3844

3945
/// Performs Loop Idiom Recognize Pass.

llvm/include/llvm/Transforms/Utils/BuildLibCalls.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ namespace llvm {
9393
Value *emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
9494
const TargetLibraryInfo *TLI);
9595

96+
/// Emit a call to the wcslen function to the builder, for the specified
97+
/// pointer. Ptr is required to be some pointer type, and the return value has
98+
/// 'size_t' type.
99+
Value *emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
100+
const TargetLibraryInfo *TLI);
101+
96102
/// Emit a call to the strdup function to the builder, for the specified
97103
/// pointer. Ptr is required to be some pointer type, and the return value has
98104
/// 'i8*' type.

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 293 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
//
2121
// TODO List:
2222
//
23-
// Future loop memory idioms to recognize:
24-
// memcmp, strlen, etc.
23+
// Future loop memory idioms to recognize: memcmp, etc.
2524
//
2625
// This could recognize common matrix multiplies and dot product idioms and
2726
// replace them with calls to BLAS (if linked in??).
@@ -33,6 +32,7 @@
3332
#include "llvm/ADT/ArrayRef.h"
3433
#include "llvm/ADT/DenseMap.h"
3534
#include "llvm/ADT/MapVector.h"
35+
#include "llvm/ADT/STLExtras.h"
3636
#include "llvm/ADT/SetVector.h"
3737
#include "llvm/ADT/SmallPtrSet.h"
3838
#include "llvm/ADT/SmallVector.h"
@@ -97,6 +97,7 @@ using namespace llvm;
9797
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
9898
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
9999
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
100+
STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
100101
STATISTIC(
101102
NumShiftUntilBitTest,
102103
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
@@ -126,6 +127,22 @@ static cl::opt<bool, true>
126127
cl::location(DisableLIRP::Memcpy), cl::init(false),
127128
cl::ReallyHidden);
128129

130+
bool DisableLIRP::Strlen;
131+
static cl::opt<bool, true>
132+
DisableLIRPStrlen("disable-loop-idiom-strlen",
133+
cl::desc("Proceed with loop idiom recognize pass, but do "
134+
"not convert loop(s) to strlen."),
135+
cl::location(DisableLIRP::Strlen), cl::init(false),
136+
cl::ReallyHidden);
137+
138+
bool DisableLIRP::Wcslen;
139+
static cl::opt<bool, true>
140+
EnableLIRPWcslen("disable-loop-idiom-wcslen",
141+
cl::desc("Proceed with loop idiom recognize pass, "
142+
"enable conversion of loop(s) to wcslen."),
143+
cl::location(DisableLIRP::Wcslen), cl::init(false),
144+
cl::ReallyHidden);
145+
129146
static cl::opt<bool> UseLIRCodeSizeHeurs(
130147
"use-lir-code-size-heurs",
131148
cl::desc("Use loop idiom recognition code size heuristics when compiling "
@@ -246,6 +263,7 @@ class LoopIdiomRecognize {
246263

247264
bool recognizeShiftUntilBitTest();
248265
bool recognizeShiftUntilZero();
266+
bool recognizeAndInsertStrLen();
249267

250268
/// @}
251269
};
@@ -295,7 +313,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
295313

296314
// Disable loop idiom recognition if the function's name is a common idiom.
297315
StringRef Name = L->getHeader()->getParent()->getName();
298-
if (Name == "memset" || Name == "memcpy")
316+
if (Name == "memset" || Name == "memcpy" || Name == "strlen" ||
317+
Name == "wcslen")
299318
return false;
300319

301320
// Determine if code size heuristics need to be applied.
@@ -1494,7 +1513,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
14941513

14951514
return recognizePopcount() || recognizeAndInsertFFS() ||
14961515
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1497-
recognizeShiftUntilLessThan();
1516+
recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
14981517
}
14991518

15001519
/// Check if the given conditional branch is based on the comparison between
@@ -1512,7 +1531,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15121531
if (!Cond)
15131532
return nullptr;
15141533

1515-
ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1534+
auto *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
15161535
if (!CmpZero || !CmpZero->isZero())
15171536
return nullptr;
15181537

@@ -1529,6 +1548,275 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15291548
return nullptr;
15301549
}
15311550

1551+
namespace {
1552+
1553+
class StrlenVerifier {
1554+
public:
1555+
explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE,
1556+
const TargetLibraryInfo *TLI)
1557+
: CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1558+
1559+
bool isValidStrlenIdiom() {
1560+
// Give up if the loop has multiple blocks, multiple backedges, or
1561+
// multiple exit blocks
1562+
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 ||
1563+
!CurLoop->getUniqueExitBlock())
1564+
return false;
1565+
1566+
// It should have a preheader and a branch instruction.
1567+
BasicBlock *Preheader = CurLoop->getLoopPreheader();
1568+
if (!Preheader)
1569+
return false;
1570+
1571+
BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator());
1572+
if (!EntryBI)
1573+
return false;
1574+
1575+
// The loop exit must be conditioned on an icmp with 0 the null terminator.
1576+
// The icmp operand has to be a load on some SSA reg that increments
1577+
// by 1 in the loop.
1578+
BasicBlock *LoopBody = *CurLoop->block_begin();
1579+
1580+
// Skip if the body is too big as it most likely is not a strlen idiom.
1581+
if (!LoopBody || LoopBody->size() >= 15)
1582+
return false;
1583+
1584+
BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
1585+
Value *LoopCond = matchCondition(LoopTerm, LoopBody);
1586+
if (!LoopCond)
1587+
return false;
1588+
1589+
LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1590+
if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
1591+
return false;
1592+
1593+
OperandType = LoopLoad->getType();
1594+
if (!OperandType || !OperandType->isIntegerTy())
1595+
return false;
1596+
1597+
// See if the pointer expression is an AddRec with constant step a of form
1598+
// ({n,+,a}) where a is the width of the char type.
1599+
Value *IncPtr = LoopLoad->getPointerOperand();
1600+
const SCEVAddRecExpr *LoadEv =
1601+
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr));
1602+
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1603+
return false;
1604+
LoadBaseEv = LoadEv->getStart();
1605+
1606+
LLVM_DEBUG(dbgs() << "pointer load scev: " << *LoadEv << "\n");
1607+
1608+
const SCEVConstant *Step =
1609+
dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE));
1610+
if (!Step)
1611+
return false;
1612+
1613+
unsigned StepSize = 0;
1614+
StepSizeCI = dyn_cast<ConstantInt>(Step->getValue());
1615+
if (!StepSizeCI)
1616+
return false;
1617+
StepSize = StepSizeCI->getZExtValue();
1618+
1619+
// Verify that StepSize is consistent with platform char width.
1620+
OpWidth = OperandType->getIntegerBitWidth();
1621+
unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
1622+
if (OpWidth != StepSize * 8)
1623+
return false;
1624+
if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
1625+
return false;
1626+
if (OpWidth >= 16)
1627+
if (OpWidth != WcharSize * 8)
1628+
return false;
1629+
1630+
// Scan every instruction in the loop to ensure there are no side effects.
1631+
for (Instruction &I : *LoopBody)
1632+
if (I.mayHaveSideEffects())
1633+
return false;
1634+
1635+
BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1636+
if (!LoopExitBB)
1637+
return false;
1638+
1639+
for (PHINode &PN : LoopExitBB->phis()) {
1640+
if (!SE->isSCEVable(PN.getType()))
1641+
return false;
1642+
1643+
const SCEV *Ev = SE->getSCEV(&PN);
1644+
if (!Ev)
1645+
return false;
1646+
1647+
LLVM_DEBUG(dbgs() << "loop exit phi scev: " << *Ev << "\n");
1648+
1649+
// Since we verified that the loop trip count will be a valid strlen
1650+
// idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1651+
// SCEVExpander materialize the loop output.
1652+
const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1653+
if (!AddRecEv || !AddRecEv->isAffine())
1654+
return false;
1655+
1656+
// We only want RecAddExpr with recurrence step that is constant. This
1657+
// is good enough for all the idioms we want to recognize. Later we expand
1658+
// and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1659+
if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)))
1660+
return false;
1661+
}
1662+
1663+
return true;
1664+
}
1665+
1666+
public:
1667+
const Loop *CurLoop;
1668+
ScalarEvolution *SE;
1669+
const TargetLibraryInfo *TLI;
1670+
1671+
unsigned OpWidth;
1672+
ConstantInt *StepSizeCI;
1673+
const SCEV *LoadBaseEv;
1674+
Type *OperandType;
1675+
};
1676+
1677+
} // namespace
1678+
1679+
/// The Strlen Idiom we are trying to detect has the following structure
1680+
///
1681+
/// preheader:
1682+
/// ...
1683+
/// br label %body, ...
1684+
///
1685+
/// body:
1686+
/// ... ; %0 is incremented by a gep
1687+
/// %1 = load i8, ptr %0, align 1
1688+
/// %2 = icmp eq i8 %1, 0
1689+
/// br i1 %2, label %exit, label %body
1690+
///
1691+
/// exit:
1692+
/// %lcssa = phi [%0, %body], ...
1693+
///
1694+
/// We expect the strlen idiom to have a load of a character type that
1695+
/// is compared against '\0', and such load pointer operand must have scev
1696+
/// expression of the form {%str,+,c} where c is a ConstantInt of the
1697+
/// appropiate character width for the idiom, and %str is the base of the string
1698+
/// And, that all lcssa phis have the form {...,+,n} where n is a constant,
1699+
///
1700+
/// When transforming the output of the strlen idiom, the lccsa phi are
1701+
/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1702+
/// and all subsequent uses are replaced. For example,
1703+
///
1704+
/// \code{.c}
1705+
/// const char* base = str;
1706+
/// while (*str != '\0')
1707+
/// ++str;
1708+
/// size_t result = str - base;
1709+
/// \endcode
1710+
///
1711+
/// will be transformed as follows: The idiom will be replaced by a strlen
1712+
/// computation to compute the address of the null terminator of the string.
1713+
///
1714+
/// \code{.c}
1715+
/// const char* base = str;
1716+
/// const char* end = base + strlen(str);
1717+
/// size_t result = end - base;
1718+
/// \endcode
1719+
///
1720+
/// In the case we index by an induction variable, as long as the induction
1721+
/// variable has a constant int increment, we can replace all such indvars
1722+
/// with the closed form computation of strlen
1723+
///
1724+
/// \code{.c}
1725+
/// size_t i = 0;
1726+
/// while (str[i] != '\0')
1727+
/// ++i;
1728+
/// size_t result = i;
1729+
/// \endcode
1730+
///
1731+
/// Will be replaced by
1732+
///
1733+
/// \code{.c}
1734+
/// size_t i = 0 + strlen(str);
1735+
/// size_t result = i;
1736+
/// \endcode
1737+
///
1738+
bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
1739+
if (DisableLIRP::All)
1740+
return false;
1741+
1742+
StrlenVerifier Verifier(CurLoop, SE, TLI);
1743+
1744+
if (!Verifier.isValidStrlenIdiom())
1745+
return false;
1746+
1747+
BasicBlock *Preheader = CurLoop->getLoopPreheader();
1748+
BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1749+
1750+
if (Verifier.OpWidth == 8) {
1751+
if (DisableLIRP::Strlen)
1752+
return false;
1753+
if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen))
1754+
return false;
1755+
} else {
1756+
if (DisableLIRP::Wcslen)
1757+
return false;
1758+
if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen))
1759+
return false;
1760+
}
1761+
1762+
IRBuilder<> Builder(Preheader->getTerminator());
1763+
SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(),
1764+
"strlen_idiom");
1765+
Value *MaterialzedBase = Expander.expandCodeFor(
1766+
Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(),
1767+
Builder.GetInsertPoint());
1768+
1769+
Value *StrLenFunc = nullptr;
1770+
if (Verifier.OpWidth == 8) {
1771+
StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI);
1772+
} else {
1773+
StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI);
1774+
}
1775+
assert(StrLenFunc && "Failed to emit strlen function.");
1776+
1777+
const SCEV *StrlenEv = SE->getSCEV(StrLenFunc);
1778+
SmallVector<PHINode *, 4> Cleanup;
1779+
for (PHINode &PN : LoopExitBB->phis()) {
1780+
// We can now materialize the loop output as all phi have scev {base,+,a}.
1781+
// We expand the phi as:
1782+
// %strlen = call i64 @strlen(%str)
1783+
// %phi.new = base expression + step * %strlen
1784+
const SCEV *Ev = SE->getSCEV(&PN);
1785+
const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1786+
const SCEVConstant *Step =
1787+
dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE));
1788+
const SCEV *Base = AddRecEv->getStart();
1789+
1790+
// It is safe to truncate to base since if base is narrower than size_t
1791+
// the equivalent user code will have to truncate anyways.
1792+
const SCEV *NewEv = SE->getAddExpr(
1793+
Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend(
1794+
StrlenEv, Base->getType())));
1795+
1796+
Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(),
1797+
Builder.GetInsertPoint());
1798+
Expander.clear();
1799+
PN.replaceAllUsesWith(MaterializedPHI);
1800+
Cleanup.push_back(&PN);
1801+
}
1802+
1803+
// All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
1804+
// up by later passes
1805+
for (PHINode *PN : Cleanup)
1806+
RecursivelyDeleteDeadPHINode(PN);
1807+
SE->forgetLoop(CurLoop);
1808+
1809+
++NumStrLen;
1810+
LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n");
1811+
ORE.emit([&]() {
1812+
return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
1813+
CurLoop->getStartLoc(), Preheader)
1814+
<< "Transformed " << StrLenFunc->getName() << " loop idiom";
1815+
});
1816+
1817+
return true;
1818+
}
1819+
15321820
/// Check if the given conditional branch is based on an unsigned less-than
15331821
/// comparison between a variable and a constant, and if the comparison is false
15341822
/// the control yields to the loop entry. If the branch matches the behaviour,

0 commit comments

Comments
 (0)