Skip to content

Commit 4ea7329

Browse files
committed
[InlineCost]: Optimize inlining of recursive function.
- Consider inlining recursive function of depth 1 only when the caller is the function itself instead of inlining it for each callsite so that we avoid redundant work. - Use CondContext instead of DomTree for better compilation time.
1 parent 3f0cabb commit 4ea7329

File tree

2 files changed

+52
-60
lines changed

2 files changed

+52
-60
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 44 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,66 +1688,52 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
16881688
if (!isa<Argument>(Cmp.getOperand(0)) || !isa<Constant>(Cmp.getOperand(1)))
16891689
return false;
16901690
auto *CmpOp = Cmp.getOperand(0);
1691-
Function *F = Cmp.getFunction();
1692-
// Iterate over the users of the function to check if it's a recursive
1693-
// function:
1694-
for (auto *U : F->users()) {
1695-
CallInst *Call = dyn_cast<CallInst>(U);
1696-
if (!Call || Call->getFunction() != F || Call->getCalledFunction() != F)
1697-
continue;
1698-
auto *CallBB = Call->getParent();
1699-
auto *Predecessor = CallBB->getSinglePredecessor();
1700-
// Only handle the case when the callsite has a single predecessor:
1701-
if (!Predecessor)
1702-
continue;
1691+
// Make sure that the callsite is recursive:
1692+
if (CandidateCall.getCaller() != &F)
1693+
return false;
1694+
CallInst *CallInstr = dyn_cast<CallInst>(&CandidateCall);
1695+
// Only handle the case when the callsite has a single predecessor:
1696+
auto *CallBB = CallInstr->getParent();
1697+
auto *Predecessor = CallBB->getSinglePredecessor();
1698+
if (!Predecessor)
1699+
return false;
1700+
// Check if the callsite is guarded by the same Cmp instruction:
1701+
auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
1702+
if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
1703+
return false;
17031704

1704-
auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
1705-
if (!Br || Br->isUnconditional())
1706-
continue;
1707-
// Check if the Br condition is the same Cmp instr we are investigating:
1708-
if (Br->getCondition() != &Cmp)
1709-
continue;
1710-
// Check if there are any arg of the recursive callsite is affecting the cmp
1711-
// instr:
1712-
bool ArgFound = false;
1713-
Value *FuncArg = nullptr, *CallArg = nullptr;
1714-
for (unsigned ArgNum = 0;
1715-
ArgNum < F->arg_size() && ArgNum < Call->arg_size(); ArgNum++) {
1716-
FuncArg = F->getArg(ArgNum);
1717-
CallArg = Call->getArgOperand(ArgNum);
1718-
if (FuncArg == CmpOp && CallArg != CmpOp) {
1719-
ArgFound = true;
1720-
break;
1721-
}
1722-
}
1723-
if (!ArgFound)
1724-
continue;
1725-
// Now we have a recursive call that is guarded by a cmp instruction.
1726-
// Check if this cmp can be simplified:
1727-
SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
1728-
DomConditionCache DC;
1729-
DC.registerBranch(Br);
1730-
SQ.DC = &DC;
1731-
if (DT.root_size() == 0) {
1732-
// Dominator tree was never constructed for any function yet.
1733-
DT.recalculate(*F);
1734-
} else if (DT.getRoot()->getParent() != F) {
1735-
// Dominator tree was constructed for a different function, recalculate
1736-
// it for the current function.
1737-
DT.recalculate(*F);
1705+
// Check if there is any arg of the recursive callsite is affecting the cmp
1706+
// instr:
1707+
bool ArgFound = false;
1708+
Value *FuncArg = nullptr, *CallArg = nullptr;
1709+
for (unsigned ArgNum = 0;
1710+
ArgNum < F.arg_size() && ArgNum < CallInstr->arg_size(); ArgNum++) {
1711+
FuncArg = F.getArg(ArgNum);
1712+
CallArg = CallInstr->getArgOperand(ArgNum);
1713+
if (FuncArg == CmpOp && CallArg != CmpOp) {
1714+
ArgFound = true;
1715+
break;
17381716
}
1739-
SQ.DT = &DT;
1740-
Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
1741-
cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
1742-
if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
1743-
bool IsTrueSuccessor = CallBB == Br->getSuccessor(0);
1744-
// Make sure that the BB of the recursive call is NOT the next successor
1745-
// of the icmp. In other words, make sure that the recursion depth is 1.
1746-
if ((ConstVal->isOne() && !IsTrueSuccessor) ||
1747-
(ConstVal->isZero() && IsTrueSuccessor)) {
1748-
SimplifiedValues[&Cmp] = ConstVal;
1749-
return true;
1750-
}
1717+
}
1718+
if (!ArgFound)
1719+
return false;
1720+
1721+
// Now we have a recursive call that is guarded by a cmp instruction.
1722+
// Check if this cmp can be simplified:
1723+
SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
1724+
CondContext CC(cast<Value>(&Cmp));
1725+
CC.CondIsTrue = CallBB == Br->getSuccessor(0);
1726+
SQ.CC = &CC;
1727+
CC.AffectedValues.insert(FuncArg);
1728+
Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
1729+
cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
1730+
if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
1731+
// Make sure that the BB of the recursive call is NOT the true successor
1732+
// of the icmp. In other words, make sure that the recursion depth is 1.
1733+
if ((ConstVal->isOne() && !CC.CondIsTrue) ||
1734+
(ConstVal->isZero() && CC.CondIsTrue)) {
1735+
SimplifiedValues[&Cmp] = ConstVal;
1736+
return true;
17511737
}
17521738
}
17531739
return false;

llvm/test/Transforms/Inline/inline-recursive-fn2.ll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,21 @@
22

33
; CHECK: Inlining calls in: test
44
; CHECK: Function size: 2
5-
; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
6-
; CHECK: Size after inlining: 10
5+
; CHECK: NOT Inlining (cost=never): recursive, Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
76

87
; CHECK: Inlining calls in: inline_rec_true_successor
98
; CHECK: Function size: 10
109
; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
1110
; CHECK: Size after inlining: 17
1211
; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i)
12+
; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test.i = tail call float @test(float %x, float %call.i)
13+
; CHECK: Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor
14+
; CHECK: Updated inlining SCC: (test, inline_rec_true_successor)
1315

16+
; CHECK: Inlining calls in: test
17+
; CHECK: Function size: 2
18+
; CHECK: Inlining (cost=25, threshold=225), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
19+
; CHECK: Size after inlining: 10
1420

1521
define float @test(float %x, float %scale) noinline {
1622
entry:

0 commit comments

Comments
 (0)