Skip to content

Commit 53b68e6

Browse files
Resubmit: [JumpThreading] Thread jumps through two basic blocks
This reverts commit 2d258ed. This revision fixes the Windows build and adds a testcase for it, namely thread-two-bbs3.ll. My original patch improperly copied EH pads on Windows. This patch disregards jump threading opportunities having to do with EH pads. [JumpThreading] Thread jumps through two basic blocks Summary: This patch teaches JumpThreading.cpp to thread through two basic blocks like: bb3: %var = phi i32* [ null, %bb1 ], [ @A, %bb2 ] %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %bb4, label ... bb4: %cmp = icmp eq i32* %var, null br i1 %cmp, label bb5, label bb6 by duplicating basic blocks like bb3 above. Once we duplicate bb3 as bb3.dup and redirect edge bb2->bb3 to bb2->bb3.dup, we have: bb3: %var = phi i32* [ @A, %bb2 ] %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %bb4, label ... bb3.dup: %var = phi i32* [ null, %bb1 ] %tobool = icmp eq i32 %cond, 0 br i1 %tobool, label %bb4, label ... bb4: %cmp = icmp eq i32* %var, null br i1 %cmp, label bb5, label bb6 Then the existing code in JumpThreading.cpp can thread edge bb3.dup->bb4 through bb4 and eventually create bb3.dup->bb5. Reviewers: wmi Subscribers: hiraditya, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70247
1 parent 6f3effb commit 53b68e6

File tree

5 files changed

+391
-2
lines changed

5 files changed

+391
-2
lines changed

llvm/include/llvm/Transforms/Scalar/JumpThreading.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,11 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
139139
RecursionSet, CxtI);
140140
}
141141

142+
Constant *EvaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB,
143+
Value *cond);
144+
bool MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond);
145+
void ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB,
146+
BasicBlock *BB, BasicBlock *SuccBB);
142147
bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
143148
jumpthreading::ConstantPreference Preference,
144149
Instruction *CxtI = nullptr);

llvm/lib/Transforms/Scalar/JumpThreading.cpp

Lines changed: 232 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,6 +1548,52 @@ FindMostPopularDest(BasicBlock *BB,
15481548
return MostPopularDest;
15491549
}
15501550

1551+
// Try to evaluate the value of V when the control flows from PredPredBB to
1552+
// BB->getSinglePredecessor() and then on to BB.
1553+
Constant *JumpThreadingPass::EvaluateOnPredecessorEdge(BasicBlock *BB,
1554+
BasicBlock *PredPredBB,
1555+
Value *V) {
1556+
BasicBlock *PredBB = BB->getSinglePredecessor();
1557+
assert(PredBB && "Expected a single predecessor");
1558+
1559+
if (Constant *Cst = dyn_cast<Constant>(V)) {
1560+
return Cst;
1561+
}
1562+
1563+
// Consult LVI if V is not an instruction in BB or PredBB.
1564+
Instruction *I = dyn_cast<Instruction>(V);
1565+
if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
1566+
if (DTU->hasPendingDomTreeUpdates())
1567+
LVI->disableDT();
1568+
else
1569+
LVI->enableDT();
1570+
return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
1571+
}
1572+
1573+
// Look into a PHI argument.
1574+
if (PHINode *PHI = dyn_cast<PHINode>(V)) {
1575+
if (PHI->getParent() == PredBB)
1576+
return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
1577+
return nullptr;
1578+
}
1579+
1580+
// If we have a CmpInst, try to fold it for each incoming edge into PredBB.
1581+
if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
1582+
if (CondCmp->getParent() == BB) {
1583+
Constant *Op0 =
1584+
EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
1585+
Constant *Op1 =
1586+
EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
1587+
if (Op0 && Op1) {
1588+
return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
1589+
}
1590+
}
1591+
return nullptr;
1592+
}
1593+
1594+
return nullptr;
1595+
}
1596+
15511597
bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
15521598
ConstantPreference Preference,
15531599
Instruction *CxtI) {
@@ -1557,8 +1603,12 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
15571603
return false;
15581604

15591605
PredValueInfoTy PredValues;
1560-
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
1561-
return false;
1606+
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
1607+
CxtI)) {
1608+
// We don't have known values in predecessors. See if we can thread through
1609+
// BB and its sole predecessor.
1610+
return MaybeThreadThroughTwoBasicBlocks(BB, Cond);
1611+
}
15621612

15631613
assert(!PredValues.empty() &&
15641614
"ComputeValueKnownInPredecessors returned true with no values");
@@ -2015,6 +2065,186 @@ JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
20152065
return ValueMapping;
20162066
}
20172067

2068+
/// Attempt to thread through two successive basic blocks.
2069+
bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
2070+
Value *Cond) {
2071+
// Consider:
2072+
//
2073+
// PredBB:
2074+
// %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
2075+
// %tobool = icmp eq i32 %cond, 0
2076+
// br i1 %tobool, label %BB, label ...
2077+
//
2078+
// BB:
2079+
// %cmp = icmp eq i32* %var, null
2080+
// br i1 %cmp, label ..., label ...
2081+
//
2082+
// We don't know the value of %var at BB even if we know which incoming edge
2083+
// we take to BB. However, once we duplicate PredBB for each of its incoming
2084+
// edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
2085+
// PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
2086+
2087+
// Require that BB end with a Branch for simplicity.
2088+
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
2089+
if (!CondBr)
2090+
return false;
2091+
2092+
// BB must have exactly one predecessor.
2093+
BasicBlock *PredBB = BB->getSinglePredecessor();
2094+
if (!PredBB)
2095+
return false;
2096+
2097+
// Require that PredBB end with a Branch. If PredBB ends with an
2098+
// unconditional branch, we should be merging PredBB and BB instead. For
2099+
// simplicity, we don't deal with a switch.
2100+
BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
2101+
if (!PredBBBranch)
2102+
return false;
2103+
2104+
// If PredBB has exactly one incoming edge, we don't gain anything by copying
2105+
// PredBB.
2106+
if (PredBB->getSinglePredecessor())
2107+
return false;
2108+
2109+
// Don't thread across a loop header.
2110+
if (LoopHeaders.count(PredBB))
2111+
return false;
2112+
2113+
// Avoid complication with duplicating EH pads.
2114+
if (PredBB->isEHPad())
2115+
return false;
2116+
2117+
// Find a predecessor that we can thread. For simplicity, we only consider a
2118+
// successor edge out of BB to which we thread exactly one incoming edge into
2119+
// PredBB.
2120+
unsigned ZeroCount = 0;
2121+
unsigned OneCount = 0;
2122+
BasicBlock *ZeroPred = nullptr;
2123+
BasicBlock *OnePred = nullptr;
2124+
for (BasicBlock *P : predecessors(PredBB)) {
2125+
if (Constant *Cst = EvaluateOnPredecessorEdge(BB, P, Cond)) {
2126+
if (Cst->isZeroValue()) {
2127+
ZeroCount++;
2128+
ZeroPred = P;
2129+
} else {
2130+
OneCount++;
2131+
OnePred = P;
2132+
}
2133+
}
2134+
}
2135+
2136+
// Disregard complicated cases where we have to thread multiple edges.
2137+
BasicBlock *PredPredBB;
2138+
if (ZeroCount == 1) {
2139+
PredPredBB = ZeroPred;
2140+
} else if (OneCount == 1) {
2141+
PredPredBB = OnePred;
2142+
} else {
2143+
return false;
2144+
}
2145+
2146+
BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
2147+
2148+
// If threading to the same block as we come from, we would infinite loop.
2149+
if (SuccBB == BB) {
2150+
LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
2151+
<< "' - would thread to self!\n");
2152+
return false;
2153+
}
2154+
2155+
// If threading this would thread across a loop header, don't thread the edge.
2156+
// See the comments above FindLoopHeaders for justifications and caveats.
2157+
if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
2158+
LLVM_DEBUG({
2159+
bool BBIsHeader = LoopHeaders.count(BB);
2160+
bool SuccIsHeader = LoopHeaders.count(SuccBB);
2161+
dbgs() << " Not threading across "
2162+
<< (BBIsHeader ? "loop header BB '" : "block BB '")
2163+
<< BB->getName() << "' to dest "
2164+
<< (SuccIsHeader ? "loop header BB '" : "block BB '")
2165+
<< SuccBB->getName()
2166+
<< "' - it might create an irreducible loop!\n";
2167+
});
2168+
return false;
2169+
}
2170+
2171+
// Check the cost of duplicating BB and PredBB.
2172+
unsigned JumpThreadCost =
2173+
getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
2174+
JumpThreadCost += getJumpThreadDuplicationCost(
2175+
PredBB, PredBB->getTerminator(), BBDupThreshold);
2176+
if (JumpThreadCost > BBDupThreshold) {
2177+
LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
2178+
<< "' - Cost is too high: " << JumpThreadCost << "\n");
2179+
return false;
2180+
}
2181+
2182+
// Now we are ready to duplicate PredBB.
2183+
ThreadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
2184+
return true;
2185+
}
2186+
2187+
void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
2188+
BasicBlock *PredBB,
2189+
BasicBlock *BB,
2190+
BasicBlock *SuccBB) {
2191+
LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
2192+
<< BB->getName() << "'\n");
2193+
2194+
BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
2195+
BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
2196+
2197+
BasicBlock *NewBB =
2198+
BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
2199+
PredBB->getParent(), PredBB);
2200+
NewBB->moveAfter(PredBB);
2201+
2202+
// Set the block frequency of NewBB.
2203+
if (HasProfileData) {
2204+
auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
2205+
BPI->getEdgeProbability(PredPredBB, PredBB);
2206+
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
2207+
}
2208+
2209+
// We are going to have to map operands from the original BB block to the new
2210+
// copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
2211+
// to account for entry from PredPredBB.
2212+
DenseMap<Instruction *, Value *> ValueMapping =
2213+
CloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
2214+
2215+
// Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
2216+
// This eliminates predecessors from PredPredBB, which requires us to simplify
2217+
// any PHI nodes in PredBB.
2218+
Instruction *PredPredTerm = PredPredBB->getTerminator();
2219+
for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
2220+
if (PredPredTerm->getSuccessor(i) == PredBB) {
2221+
PredBB->removePredecessor(PredPredBB, true);
2222+
PredPredTerm->setSuccessor(i, NewBB);
2223+
}
2224+
2225+
AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
2226+
ValueMapping);
2227+
AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
2228+
ValueMapping);
2229+
2230+
DTU->applyUpdatesPermissive(
2231+
{{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
2232+
{DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
2233+
{DominatorTree::Insert, PredPredBB, NewBB},
2234+
{DominatorTree::Delete, PredPredBB, PredBB}});
2235+
2236+
UpdateSSA(PredBB, NewBB, ValueMapping);
2237+
2238+
// Clean up things like PHI nodes with single operands, dead instructions,
2239+
// etc.
2240+
SimplifyInstructionsInBlock(NewBB, TLI);
2241+
SimplifyInstructionsInBlock(PredBB, TLI);
2242+
2243+
SmallVector<BasicBlock *, 1> PredsToFactor;
2244+
PredsToFactor.push_back(NewBB);
2245+
ThreadEdge(BB, PredsToFactor, SuccBB);
2246+
}
2247+
20182248
/// TryThreadEdge - Thread an edge if it's safe and profitable to do so.
20192249
bool JumpThreadingPass::TryThreadEdge(
20202250
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; RUN: opt < %s -jump-threading -S -verify | FileCheck %s
2+
3+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
@a = global i32 0, align 4
7+
8+
define void @foo(i32 %cond1, i32 %cond2) {
9+
; CHECK-LABEL: @foo
10+
; CHECK-LABEL: entry
11+
entry:
12+
%tobool = icmp eq i32 %cond1, 0
13+
br i1 %tobool, label %bb.cond2, label %bb.f1
14+
15+
bb.f1:
16+
call void @f1()
17+
br label %bb.cond2
18+
; Verify that we branch on cond2 without checking ptr.
19+
; CHECK: call void @f1()
20+
; CHECK-NEXT: icmp eq i32 %cond2, 0
21+
; CHECK-NEXT: label %bb.f4, label %bb.f2
22+
23+
bb.cond2:
24+
%ptr = phi i32* [ null, %bb.f1 ], [ @a, %entry ]
25+
%tobool1 = icmp eq i32 %cond2, 0
26+
br i1 %tobool1, label %bb.file, label %bb.f2
27+
; Verify that we branch on cond2 without checking ptr.
28+
; CHECK: icmp eq i32 %cond2, 0
29+
; CHECK-NEXT: label %bb.f3, label %bb.f2
30+
31+
bb.f2:
32+
call void @f2()
33+
br label %exit
34+
35+
; Verify that we eliminate this basic block.
36+
; CHECK-NOT: bb.file:
37+
bb.file:
38+
%cmp = icmp eq i32* %ptr, null
39+
br i1 %cmp, label %bb.f4, label %bb.f3
40+
41+
bb.f3:
42+
call void @f3()
43+
br label %exit
44+
45+
bb.f4:
46+
call void @f4()
47+
br label %exit
48+
49+
exit:
50+
ret void
51+
}
52+
53+
declare void @f1()
54+
55+
declare void @f2()
56+
57+
declare void @f3()
58+
59+
declare void @f4()
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; RUN: opt < %s -jump-threading -S -verify | FileCheck %s
2+
3+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
define void @foo(i32 %cond1, i32 %cond2) {
7+
; CHECK-LABEL: @foo
8+
; CHECK-LABEL: entry
9+
entry:
10+
%tobool = icmp ne i32 %cond1, 0
11+
br i1 %tobool, label %bb.f1, label %bb.f2
12+
13+
bb.f1:
14+
call void @f1()
15+
br label %bb.cond2
16+
; Verify that we branch on cond2 without checking tobool again.
17+
; CHECK: call void @f1()
18+
; CHECK-NEXT: icmp eq i32 %cond2, 0
19+
; CHECK-NEXT: label %exit, label %bb.f3
20+
21+
bb.f2:
22+
call void @f2()
23+
br label %bb.cond2
24+
; Verify that we branch on cond2 without checking tobool again.
25+
; CHECK: call void @f2()
26+
; CHECK-NEXT: icmp eq i32 %cond2, 0
27+
; CHECK-NEXT: label %exit, label %bb.f4
28+
29+
bb.cond2:
30+
%tobool1 = icmp eq i32 %cond2, 0
31+
br i1 %tobool1, label %exit, label %bb.cond1again
32+
33+
; Verify that we eliminate this basic block.
34+
; CHECK-NOT: bb.cond1again:
35+
bb.cond1again:
36+
br i1 %tobool, label %bb.f3, label %bb.f4
37+
38+
bb.f3:
39+
call void @f3()
40+
br label %exit
41+
42+
bb.f4:
43+
call void @f4()
44+
br label %exit
45+
46+
exit:
47+
ret void
48+
}
49+
50+
declare void @f1() local_unnamed_addr
51+
52+
declare void @f2() local_unnamed_addr
53+
54+
declare void @f3() local_unnamed_addr
55+
56+
declare void @f4() local_unnamed_addr

0 commit comments

Comments
 (0)