Skip to content

Commit bf6357f

Browse files
mustarttMeinersbur
andauthored
[Transforms] LoopIdiomRecognize recognize strlen and wcslen (llvm#108985)
This PR continues the effort made in https://discourse.llvm.org/t/rfc-strlen-loop-idiom-recognition-folding/55848 and https://reviews.llvm.org/D83392 and https://reviews.llvm.org/D88460 to extend `LoopIdiomRecognize` to find and replace loops of the form ```c base = str; while (*str) ++str; ``` and transforming the `strlen` loop idiom into the appropriate `strlen` and `wcslen` library call which will give a small performance boost if replaced. ```c str = base + strlen(base) len = str - base ``` --------- Co-authored-by: Michael Kruse <[email protected]>
1 parent 8988914 commit bf6357f

File tree

7 files changed

+1201
-5
lines changed

7 files changed

+1201
-5
lines changed

llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ struct DisableLIRP {
3434

3535
/// When true, Memcpy is disabled.
3636
static bool Memcpy;
37+
38+
/// When true, Strlen is disabled.
39+
static bool Strlen;
40+
41+
/// When true, Wcslen is disabled.
42+
static bool Wcslen;
3743
};
3844

3945
/// Performs Loop Idiom Recognize Pass.

llvm/include/llvm/Transforms/Utils/BuildLibCalls.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ namespace llvm {
9393
Value *emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
9494
const TargetLibraryInfo *TLI);
9595

96+
/// Emit a call to the wcslen function to the builder, for the specified
97+
/// pointer. Ptr is required to be some pointer type, and the return value has
98+
/// 'size_t' type.
99+
Value *emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
100+
const TargetLibraryInfo *TLI);
101+
96102
/// Emit a call to the strdup function to the builder, for the specified
97103
/// pointer. Ptr is required to be some pointer type, and the return value has
98104
/// 'i8*' type.

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 306 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
//
2121
// TODO List:
2222
//
23-
// Future loop memory idioms to recognize:
24-
// memcmp, strlen, etc.
23+
// Future loop memory idioms to recognize: memcmp, etc.
2524
//
2625
// This could recognize common matrix multiplies and dot product idioms and
2726
// replace them with calls to BLAS (if linked in??).
@@ -33,6 +32,7 @@
3332
#include "llvm/ADT/ArrayRef.h"
3433
#include "llvm/ADT/DenseMap.h"
3534
#include "llvm/ADT/MapVector.h"
35+
#include "llvm/ADT/STLExtras.h"
3636
#include "llvm/ADT/SetVector.h"
3737
#include "llvm/ADT/SmallPtrSet.h"
3838
#include "llvm/ADT/SmallVector.h"
@@ -97,6 +97,7 @@ using namespace llvm;
9797
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
9898
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
9999
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
100+
STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
100101
STATISTIC(
101102
NumShiftUntilBitTest,
102103
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
@@ -126,6 +127,26 @@ static cl::opt<bool, true>
126127
cl::location(DisableLIRP::Memcpy), cl::init(false),
127128
cl::ReallyHidden);
128129

130+
bool DisableLIRP::Strlen;
131+
static cl::opt<bool, true>
132+
DisableLIRPStrlen("disable-loop-idiom-strlen",
133+
cl::desc("Proceed with loop idiom recognize pass, but do "
134+
"not convert loop(s) to strlen."),
135+
cl::location(DisableLIRP::Strlen), cl::init(false),
136+
cl::ReallyHidden);
137+
138+
/// Some target libraries have a significant call overhead for `wcslen`,
139+
/// which can degrade performance when the input string is not long enough
140+
/// to justify the cost. To avoid unnecessary performance penalties,
141+
/// we disable it by default.
142+
bool DisableLIRP::Wcslen;
143+
static cl::opt<bool, true>
144+
EnableLIRPWcslen("enable-loop-idiom-wcslen",
145+
cl::desc("Proceed with loop idiom recognize pass, "
146+
"enable conversion of loop(s) to wcslen."),
147+
cl::location(DisableLIRP::Wcslen), cl::init(true),
148+
cl::ReallyHidden);
149+
129150
static cl::opt<bool> UseLIRCodeSizeHeurs(
130151
"use-lir-code-size-heurs",
131152
cl::desc("Use loop idiom recognition code size heuristics when compiling "
@@ -246,6 +267,7 @@ class LoopIdiomRecognize {
246267

247268
bool recognizeShiftUntilBitTest();
248269
bool recognizeShiftUntilZero();
270+
bool recognizeAndInsertStrLen();
249271

250272
/// @}
251273
};
@@ -1494,7 +1516,17 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
14941516

14951517
return recognizePopcount() || recognizeAndInsertFFS() ||
14961518
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1497-
recognizeShiftUntilLessThan();
1519+
recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
1520+
}
1521+
1522+
/// Check if a Value is either a nullptr or a constant int zero
1523+
static bool isZeroConstant(const Value *Val) {
1524+
if (isa<ConstantPointerNull>(Val))
1525+
return true;
1526+
const ConstantInt *CmpZero = dyn_cast<ConstantInt>(Val);
1527+
if (!CmpZero || !CmpZero->isZero())
1528+
return false;
1529+
return true;
14981530
}
14991531

15001532
/// Check if the given conditional branch is based on the comparison between
@@ -1512,8 +1544,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15121544
if (!Cond)
15131545
return nullptr;
15141546

1515-
ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1516-
if (!CmpZero || !CmpZero->isZero())
1547+
if (!isZeroConstant(Cond->getOperand(1)))
15171548
return nullptr;
15181549

15191550
BasicBlock *TrueSucc = BI->getSuccessor(0);
@@ -1529,6 +1560,276 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15291560
return nullptr;
15301561
}
15311562

1563+
namespace {
1564+
1565+
class StrlenVerifier {
1566+
public:
1567+
explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE,
1568+
const TargetLibraryInfo *TLI)
1569+
: CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1570+
1571+
bool isValidStrlenIdiom() {
1572+
// Give up if the loop has multiple blocks, multiple backedges, or
1573+
// multiple exit blocks
1574+
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 ||
1575+
!CurLoop->getUniqueExitBlock())
1576+
return false;
1577+
1578+
// It should have a preheader and a branch instruction.
1579+
BasicBlock *Preheader = CurLoop->getLoopPreheader();
1580+
if (!Preheader)
1581+
return false;
1582+
1583+
BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator());
1584+
if (!EntryBI)
1585+
return false;
1586+
1587+
// The loop exit must be conditioned on an icmp with 0 the null terminator.
1588+
// The icmp operand has to be a load on some SSA reg that increments
1589+
// by 1 in the loop.
1590+
BasicBlock *LoopBody = *CurLoop->block_begin();
1591+
1592+
// Skip if the body is too big as it most likely is not a strlen idiom.
1593+
if (!LoopBody || LoopBody->size() >= 15)
1594+
return false;
1595+
1596+
BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
1597+
Value *LoopCond = matchCondition(LoopTerm, LoopBody);
1598+
if (!LoopCond)
1599+
return false;
1600+
1601+
LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1602+
if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
1603+
return false;
1604+
1605+
OperandType = LoopLoad->getType();
1606+
if (!OperandType || !OperandType->isIntegerTy())
1607+
return false;
1608+
1609+
// See if the pointer expression is an AddRec with constant step a of form
1610+
// ({n,+,a}) where a is the width of the char type.
1611+
Value *IncPtr = LoopLoad->getPointerOperand();
1612+
const SCEVAddRecExpr *LoadEv =
1613+
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr));
1614+
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1615+
return false;
1616+
LoadBaseEv = LoadEv->getStart();
1617+
1618+
LLVM_DEBUG({
1619+
dbgs() << "pointer load scev: ";
1620+
LoadEv->print(outs());
1621+
dbgs() << "\n";
1622+
});
1623+
1624+
const SCEVConstant *Step =
1625+
dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE));
1626+
if (!Step)
1627+
return false;
1628+
1629+
unsigned StepSize = 0;
1630+
StepSizeCI = dyn_cast<ConstantInt>(Step->getValue());
1631+
if (!StepSizeCI)
1632+
return false;
1633+
StepSize = StepSizeCI->getZExtValue();
1634+
1635+
// Verify that StepSize is consistent with platform char width.
1636+
OpWidth = OperandType->getIntegerBitWidth();
1637+
unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
1638+
if (OpWidth != StepSize * 8)
1639+
return false;
1640+
if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
1641+
return false;
1642+
if (OpWidth >= 16)
1643+
if (OpWidth != WcharSize * 8)
1644+
return false;
1645+
1646+
// Scan every instruction in the loop to ensure there are no side effects.
1647+
for (Instruction &I : *LoopBody)
1648+
if (I.mayHaveSideEffects())
1649+
return false;
1650+
1651+
BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1652+
if (!LoopExitBB)
1653+
return false;
1654+
1655+
for (PHINode &PN : LoopExitBB->phis()) {
1656+
if (!SE->isSCEVable(PN.getType()))
1657+
return false;
1658+
1659+
const SCEV *Ev = SE->getSCEV(&PN);
1660+
if (!Ev)
1661+
return false;
1662+
1663+
LLVM_DEBUG({
1664+
dbgs() << "loop exit phi scev: ";
1665+
Ev->print(dbgs());
1666+
dbgs() << "\n";
1667+
});
1668+
1669+
// Since we verified that the loop trip count will be a valid strlen
1670+
// idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1671+
// SCEVExpander materialize the loop output.
1672+
const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1673+
if (!AddRecEv || !AddRecEv->isAffine())
1674+
return false;
1675+
1676+
// We only want RecAddExpr with recurrence step that is constant. This
1677+
// is good enough for all the idioms we want to recognize. Later we expand
1678+
// and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1679+
if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)))
1680+
return false;
1681+
}
1682+
1683+
return true;
1684+
}
1685+
1686+
public:
1687+
const Loop *CurLoop;
1688+
ScalarEvolution *SE;
1689+
const TargetLibraryInfo *TLI;
1690+
1691+
unsigned OpWidth;
1692+
ConstantInt *StepSizeCI;
1693+
const SCEV *LoadBaseEv;
1694+
Type *OperandType;
1695+
};
1696+
1697+
} // namespace
1698+
1699+
/// The Strlen Idiom we are trying to detect has the following structure
1700+
///
1701+
/// preheader:
1702+
/// ...
1703+
/// br label %body, ...
1704+
///
1705+
/// body:
1706+
/// ... ; %0 is incremented by a gep
1707+
/// %1 = load i8, ptr %0, align 1
1708+
/// %2 = icmp eq i8 %1, 0
1709+
/// br i1 %2, label %exit, label %body
1710+
///
1711+
/// exit:
1712+
/// %lcssa = phi [%0, %body], ...
1713+
///
1714+
/// We expect the strlen idiom to have a load of a character type that
1715+
/// is compared against '\0', and such load pointer operand must have scev
1716+
/// expression of the form {%str,+,c} where c is a ConstantInt of the
1717+
/// appropiate character width for the idiom, and %str is the base of the string
1718+
/// And, that all lcssa phis have the form {...,+,n} where n is a constant,
1719+
///
1720+
/// When transforming the output of the strlen idiom, the lccsa phi are
1721+
/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1722+
/// and all subsequent uses are replaced. For example,
1723+
///
1724+
/// \code{.c}
1725+
/// const char* base = str;
1726+
/// while (*str != '\0')
1727+
/// ++str;
1728+
/// size_t result = str - base;
1729+
/// \endcode
1730+
///
1731+
/// will be transformed as follows: The idiom will be replaced by a strlen
1732+
/// computation to compute the address of the null terminator of the string.
1733+
///
1734+
/// \code{.c}
1735+
/// const char* base = str;
1736+
/// const char* end = base + strlen(str);
1737+
/// size_t result = end - base;
1738+
/// \endcode
1739+
///
1740+
/// In the case we index by an induction variable, as long as the induction
1741+
/// variable has a constant int increment, we can replace all such indvars
1742+
/// with the closed form computation of strlen
1743+
///
1744+
/// \code{.c}
1745+
/// size_t i = 0;
1746+
/// while (str[i] != '\0')
1747+
/// ++i;
1748+
/// size_t result = i;
1749+
/// \endcode
1750+
///
1751+
/// Will be replaced by
1752+
///
1753+
/// \code{.c}
1754+
/// size_t i = 0 + strlen(str);
1755+
/// size_t result = i;
1756+
/// \endcode
1757+
///
1758+
bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
1759+
if (DisableLIRP::All)
1760+
return false;
1761+
1762+
StrlenVerifier Verifier(CurLoop, SE, TLI);
1763+
1764+
if (!Verifier.isValidStrlenIdiom())
1765+
return false;
1766+
1767+
BasicBlock *Preheader = CurLoop->getLoopPreheader();
1768+
BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1769+
1770+
IRBuilder<> Builder(Preheader->getTerminator());
1771+
SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(),
1772+
"strlen_idiom");
1773+
Value *MaterialzedBase = Expander.expandCodeFor(
1774+
Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(),
1775+
Builder.GetInsertPoint());
1776+
1777+
Value *StrLenFunc = nullptr;
1778+
if (Verifier.OpWidth == 8) {
1779+
if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen))
1780+
return false;
1781+
StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI);
1782+
} else {
1783+
if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen) &&
1784+
!DisableLIRP::Wcslen)
1785+
return false;
1786+
StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI);
1787+
}
1788+
assert(StrLenFunc && "Failed to emit strlen function.");
1789+
1790+
const SCEV *StrlenEv = SE->getSCEV(StrLenFunc);
1791+
SmallVector<PHINode *, 4> Cleanup;
1792+
for (PHINode &PN : LoopExitBB->phis()) {
1793+
// We can now materialize the loop output as all phi have scev {base,+,a}.
1794+
// We expand the phi as:
1795+
// %strlen = call i64 @strlen(%str)
1796+
// %phi.new = base expression + step * %strlen
1797+
const SCEV *Ev = SE->getSCEV(&PN);
1798+
const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1799+
const SCEVConstant *Step =
1800+
dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE));
1801+
const SCEV *Base = AddRecEv->getStart();
1802+
1803+
// It is safe to truncate to base since if base is narrower than size_t
1804+
// the equivalent user code will have to truncate anyways.
1805+
const SCEV *NewEv = SE->getAddExpr(
1806+
Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend(
1807+
StrlenEv, Base->getType())));
1808+
1809+
Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(),
1810+
Builder.GetInsertPoint());
1811+
Expander.clear();
1812+
PN.replaceAllUsesWith(MaterializedPHI);
1813+
Cleanup.push_back(&PN);
1814+
}
1815+
1816+
// All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
1817+
// up by later passes
1818+
for (PHINode *PN : Cleanup)
1819+
RecursivelyDeleteDeadPHINode(PN);
1820+
SE->forgetLoop(CurLoop);
1821+
1822+
++NumStrLen;
1823+
LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n");
1824+
ORE.emit([&]() {
1825+
return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
1826+
CurLoop->getStartLoc(), Preheader)
1827+
<< "Transformed " << StrLenFunc->getName() << " loop idiom";
1828+
});
1829+
1830+
return true;
1831+
}
1832+
15321833
/// Check if the given conditional branch is based on an unsigned less-than
15331834
/// comparison between a variable and a constant, and if the comparison is false
15341835
/// the control yields to the loop entry. If the branch matches the behaviour,

0 commit comments

Comments
 (0)