Skip to content

Commit ac9049d

Browse files
authored
Reland "[Transforms] LoopIdiomRecognize recognize strlen and wcslen (#108985)" (#131412)
Relands #108985 This PR continues the effort made in https://discourse.llvm.org/t/rfc-strlen-loop-idiom-recognition-folding/55848 and https://reviews.llvm.org/D83392 and https://reviews.llvm.org/D88460 to extend `LoopIdiomRecognize` to find and replace loops of the form ```c base = str; while (*str) ++str; ``` and transforming the `strlen` loop idiom into the appropriate `strlen` and `wcslen` library call which will give a small performance boost if replaced. ```c str = base + strlen(base) len = str - base ```
1 parent 610ade2 commit ac9049d

File tree

8 files changed

+1266
-5
lines changed

8 files changed

+1266
-5
lines changed

llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ struct DisableLIRP {
3434

3535
/// When true, Memcpy is disabled.
3636
static bool Memcpy;
37+
38+
/// When true, Strlen is disabled.
39+
static bool Strlen;
40+
41+
/// When true, Wcslen is disabled.
42+
static bool Wcslen;
3743
};
3844

3945
/// Performs Loop Idiom Recognize Pass.

llvm/include/llvm/Transforms/Utils/BuildLibCalls.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ namespace llvm {
9393
Value *emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
9494
const TargetLibraryInfo *TLI);
9595

96+
/// Emit a call to the wcslen function to the builder, for the specified
97+
/// pointer. Ptr is required to be some pointer type, and the return value has
98+
/// 'size_t' type.
99+
Value *emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
100+
const TargetLibraryInfo *TLI);
101+
96102
/// Emit a call to the strdup function to the builder, for the specified
97103
/// pointer. Ptr is required to be some pointer type, and the return value has
98104
/// 'i8*' type.

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Lines changed: 305 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
//
2121
// TODO List:
2222
//
23-
// Future loop memory idioms to recognize:
24-
// memcmp, strlen, etc.
23+
// Future loop memory idioms to recognize: memcmp, etc.
2524
//
2625
// This could recognize common matrix multiplies and dot product idioms and
2726
// replace them with calls to BLAS (if linked in??).
@@ -33,6 +32,7 @@
3332
#include "llvm/ADT/ArrayRef.h"
3433
#include "llvm/ADT/DenseMap.h"
3534
#include "llvm/ADT/MapVector.h"
35+
#include "llvm/ADT/STLExtras.h"
3636
#include "llvm/ADT/SetVector.h"
3737
#include "llvm/ADT/SmallPtrSet.h"
3838
#include "llvm/ADT/SmallVector.h"
@@ -97,6 +97,7 @@ using namespace llvm;
9797
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
9898
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
9999
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
100+
STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
100101
STATISTIC(
101102
NumShiftUntilBitTest,
102103
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
@@ -126,6 +127,22 @@ static cl::opt<bool, true>
126127
cl::location(DisableLIRP::Memcpy), cl::init(false),
127128
cl::ReallyHidden);
128129

130+
bool DisableLIRP::Strlen;
131+
static cl::opt<bool, true>
132+
DisableLIRPStrlen("disable-loop-idiom-strlen",
133+
cl::desc("Proceed with loop idiom recognize pass, but do "
134+
"not convert loop(s) to strlen."),
135+
cl::location(DisableLIRP::Strlen), cl::init(false),
136+
cl::ReallyHidden);
137+
138+
bool DisableLIRP::Wcslen;
139+
static cl::opt<bool, true>
140+
EnableLIRPWcslen("disable-loop-idiom-wcslen",
141+
cl::desc("Proceed with loop idiom recognize pass, "
142+
"enable conversion of loop(s) to wcslen."),
143+
cl::location(DisableLIRP::Wcslen), cl::init(false),
144+
cl::ReallyHidden);
145+
129146
static cl::opt<bool> UseLIRCodeSizeHeurs(
130147
"use-lir-code-size-heurs",
131148
cl::desc("Use loop idiom recognition code size heuristics when compiling "
@@ -246,6 +263,7 @@ class LoopIdiomRecognize {
246263

247264
bool recognizeShiftUntilBitTest();
248265
bool recognizeShiftUntilZero();
266+
bool recognizeAndInsertStrLen();
249267

250268
/// @}
251269
};
@@ -1494,7 +1512,17 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
14941512

14951513
return recognizePopcount() || recognizeAndInsertFFS() ||
14961514
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
1497-
recognizeShiftUntilLessThan();
1515+
recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
1516+
}
1517+
1518+
/// Check if a Value is either a nullptr or a constant int zero
1519+
static bool isZeroConstant(const Value *Val) {
1520+
if (isa<ConstantPointerNull>(Val))
1521+
return true;
1522+
const ConstantInt *CmpZero = dyn_cast<ConstantInt>(Val);
1523+
if (!CmpZero || !CmpZero->isZero())
1524+
return false;
1525+
return true;
14981526
}
14991527

15001528
/// Check if the given conditional branch is based on the comparison between
@@ -1512,8 +1540,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15121540
if (!Cond)
15131541
return nullptr;
15141542

1515-
ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
1516-
if (!CmpZero || !CmpZero->isZero())
1543+
if (!isZeroConstant(Cond->getOperand(1)))
15171544
return nullptr;
15181545

15191546
BasicBlock *TrueSucc = BI->getSuccessor(0);
@@ -1529,6 +1556,279 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
15291556
return nullptr;
15301557
}
15311558

1559+
namespace {
1560+
1561+
class StrlenVerifier {
1562+
public:
1563+
explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE,
1564+
const TargetLibraryInfo *TLI)
1565+
: CurLoop(CurLoop), SE(SE), TLI(TLI) {}
1566+
1567+
bool isValidStrlenIdiom() {
1568+
// Give up if the loop has multiple blocks, multiple backedges, or
1569+
// multiple exit blocks
1570+
if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 ||
1571+
!CurLoop->getUniqueExitBlock())
1572+
return false;
1573+
1574+
// It should have a preheader and a branch instruction.
1575+
BasicBlock *Preheader = CurLoop->getLoopPreheader();
1576+
if (!Preheader)
1577+
return false;
1578+
1579+
BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator());
1580+
if (!EntryBI)
1581+
return false;
1582+
1583+
// The loop exit must be conditioned on an icmp with 0 the null terminator.
1584+
// The icmp operand has to be a load on some SSA reg that increments
1585+
// by 1 in the loop.
1586+
BasicBlock *LoopBody = *CurLoop->block_begin();
1587+
1588+
// Skip if the body is too big as it most likely is not a strlen idiom.
1589+
if (!LoopBody || LoopBody->size() >= 15)
1590+
return false;
1591+
1592+
BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
1593+
Value *LoopCond = matchCondition(LoopTerm, LoopBody);
1594+
if (!LoopCond)
1595+
return false;
1596+
1597+
LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
1598+
if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
1599+
return false;
1600+
1601+
OperandType = LoopLoad->getType();
1602+
if (!OperandType || !OperandType->isIntegerTy())
1603+
return false;
1604+
1605+
// See if the pointer expression is an AddRec with constant step a of form
1606+
// ({n,+,a}) where a is the width of the char type.
1607+
Value *IncPtr = LoopLoad->getPointerOperand();
1608+
const SCEVAddRecExpr *LoadEv =
1609+
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr));
1610+
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1611+
return false;
1612+
LoadBaseEv = LoadEv->getStart();
1613+
1614+
LLVM_DEBUG({
1615+
dbgs() << "pointer load scev: ";
1616+
LoadEv->print(outs());
1617+
dbgs() << "\n";
1618+
});
1619+
1620+
const SCEVConstant *Step =
1621+
dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE));
1622+
if (!Step)
1623+
return false;
1624+
1625+
unsigned StepSize = 0;
1626+
StepSizeCI = dyn_cast<ConstantInt>(Step->getValue());
1627+
if (!StepSizeCI)
1628+
return false;
1629+
StepSize = StepSizeCI->getZExtValue();
1630+
1631+
// Verify that StepSize is consistent with platform char width.
1632+
OpWidth = OperandType->getIntegerBitWidth();
1633+
unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
1634+
if (OpWidth != StepSize * 8)
1635+
return false;
1636+
if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
1637+
return false;
1638+
if (OpWidth >= 16)
1639+
if (OpWidth != WcharSize * 8)
1640+
return false;
1641+
1642+
// Scan every instruction in the loop to ensure there are no side effects.
1643+
for (Instruction &I : *LoopBody)
1644+
if (I.mayHaveSideEffects())
1645+
return false;
1646+
1647+
BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1648+
if (!LoopExitBB)
1649+
return false;
1650+
1651+
for (PHINode &PN : LoopExitBB->phis()) {
1652+
if (!SE->isSCEVable(PN.getType()))
1653+
return false;
1654+
1655+
const SCEV *Ev = SE->getSCEV(&PN);
1656+
if (!Ev)
1657+
return false;
1658+
1659+
LLVM_DEBUG({
1660+
dbgs() << "loop exit phi scev: ";
1661+
Ev->print(dbgs());
1662+
dbgs() << "\n";
1663+
});
1664+
1665+
// Since we verified that the loop trip count will be a valid strlen
1666+
// idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
1667+
// SCEVExpander materialize the loop output.
1668+
const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1669+
if (!AddRecEv || !AddRecEv->isAffine())
1670+
return false;
1671+
1672+
// We only want RecAddExpr with recurrence step that is constant. This
1673+
// is good enough for all the idioms we want to recognize. Later we expand
1674+
// and materialize the recurrence as {base,+,a} -> (base + a * strlen)
1675+
if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)))
1676+
return false;
1677+
}
1678+
1679+
return true;
1680+
}
1681+
1682+
public:
1683+
const Loop *CurLoop;
1684+
ScalarEvolution *SE;
1685+
const TargetLibraryInfo *TLI;
1686+
1687+
unsigned OpWidth;
1688+
ConstantInt *StepSizeCI;
1689+
const SCEV *LoadBaseEv;
1690+
Type *OperandType;
1691+
};
1692+
1693+
} // namespace
1694+
1695+
/// The Strlen Idiom we are trying to detect has the following structure
1696+
///
1697+
/// preheader:
1698+
/// ...
1699+
/// br label %body, ...
1700+
///
1701+
/// body:
1702+
/// ... ; %0 is incremented by a gep
1703+
/// %1 = load i8, ptr %0, align 1
1704+
/// %2 = icmp eq i8 %1, 0
1705+
/// br i1 %2, label %exit, label %body
1706+
///
1707+
/// exit:
1708+
/// %lcssa = phi [%0, %body], ...
1709+
///
1710+
/// We expect the strlen idiom to have a load of a character type that
1711+
/// is compared against '\0', and such load pointer operand must have scev
1712+
/// expression of the form {%str,+,c} where c is a ConstantInt of the
1713+
/// appropiate character width for the idiom, and %str is the base of the string
1714+
/// And, that all lcssa phis have the form {...,+,n} where n is a constant,
1715+
///
1716+
/// When transforming the output of the strlen idiom, the lccsa phi are
1717+
/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
1718+
/// and all subsequent uses are replaced. For example,
1719+
///
1720+
/// \code{.c}
1721+
/// const char* base = str;
1722+
/// while (*str != '\0')
1723+
/// ++str;
1724+
/// size_t result = str - base;
1725+
/// \endcode
1726+
///
1727+
/// will be transformed as follows: The idiom will be replaced by a strlen
1728+
/// computation to compute the address of the null terminator of the string.
1729+
///
1730+
/// \code{.c}
1731+
/// const char* base = str;
1732+
/// const char* end = base + strlen(str);
1733+
/// size_t result = end - base;
1734+
/// \endcode
1735+
///
1736+
/// In the case we index by an induction variable, as long as the induction
1737+
/// variable has a constant int increment, we can replace all such indvars
1738+
/// with the closed form computation of strlen
1739+
///
1740+
/// \code{.c}
1741+
/// size_t i = 0;
1742+
/// while (str[i] != '\0')
1743+
/// ++i;
1744+
/// size_t result = i;
1745+
/// \endcode
1746+
///
1747+
/// Will be replaced by
1748+
///
1749+
/// \code{.c}
1750+
/// size_t i = 0 + strlen(str);
1751+
/// size_t result = i;
1752+
/// \endcode
1753+
///
1754+
bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
1755+
if (DisableLIRP::All)
1756+
return false;
1757+
1758+
StrlenVerifier Verifier(CurLoop, SE, TLI);
1759+
1760+
if (!Verifier.isValidStrlenIdiom())
1761+
return false;
1762+
1763+
BasicBlock *Preheader = CurLoop->getLoopPreheader();
1764+
BasicBlock *LoopExitBB = CurLoop->getExitBlock();
1765+
1766+
IRBuilder<> Builder(Preheader->getTerminator());
1767+
SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(),
1768+
"strlen_idiom");
1769+
Value *MaterialzedBase = Expander.expandCodeFor(
1770+
Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(),
1771+
Builder.GetInsertPoint());
1772+
1773+
Value *StrLenFunc = nullptr;
1774+
if (Verifier.OpWidth == 8) {
1775+
if (DisableLIRP::Strlen)
1776+
return false;
1777+
if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen))
1778+
return false;
1779+
StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI);
1780+
} else {
1781+
if (DisableLIRP::Wcslen)
1782+
return false;
1783+
if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen))
1784+
return false;
1785+
StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI);
1786+
}
1787+
assert(StrLenFunc && "Failed to emit strlen function.");
1788+
1789+
const SCEV *StrlenEv = SE->getSCEV(StrLenFunc);
1790+
SmallVector<PHINode *, 4> Cleanup;
1791+
for (PHINode &PN : LoopExitBB->phis()) {
1792+
// We can now materialize the loop output as all phi have scev {base,+,a}.
1793+
// We expand the phi as:
1794+
// %strlen = call i64 @strlen(%str)
1795+
// %phi.new = base expression + step * %strlen
1796+
const SCEV *Ev = SE->getSCEV(&PN);
1797+
const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
1798+
const SCEVConstant *Step =
1799+
dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE));
1800+
const SCEV *Base = AddRecEv->getStart();
1801+
1802+
// It is safe to truncate to base since if base is narrower than size_t
1803+
// the equivalent user code will have to truncate anyways.
1804+
const SCEV *NewEv = SE->getAddExpr(
1805+
Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend(
1806+
StrlenEv, Base->getType())));
1807+
1808+
Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(),
1809+
Builder.GetInsertPoint());
1810+
Expander.clear();
1811+
PN.replaceAllUsesWith(MaterializedPHI);
1812+
Cleanup.push_back(&PN);
1813+
}
1814+
1815+
// All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
1816+
// up by later passes
1817+
for (PHINode *PN : Cleanup)
1818+
RecursivelyDeleteDeadPHINode(PN);
1819+
SE->forgetLoop(CurLoop);
1820+
1821+
++NumStrLen;
1822+
LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n");
1823+
ORE.emit([&]() {
1824+
return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
1825+
CurLoop->getStartLoc(), Preheader)
1826+
<< "Transformed " << StrLenFunc->getName() << " loop idiom";
1827+
});
1828+
1829+
return true;
1830+
}
1831+
15321832
/// Check if the given conditional branch is based on an unsigned less-than
15331833
/// comparison between a variable and a constant, and if the comparison is false
15341834
/// the control yields to the loop entry. If the branch matches the behaviour,

0 commit comments

Comments
 (0)