Skip to content

Commit c9325f8

Browse files
authored
[DFAJumpThreading] Add an early exit heuristic for unpredictable values (#85015)
Right now the algorithm does not exit on unpredictable values. It waits until all the paths have been enumerated to see if any of those paths have that value. Waiting this late leads to a lot of wasteful computation and higher compile time. In this patch I have added a heuristic that checks if the value comes from the same inner loops as the switch, if so, then it is likely that the value will also be seen on a threadable path and the code in `getStateDefMap()` return an empty map. I tested this on the llvm test suite and the only change in the number of threaded switches was in 7zip (before 23, after 18). In all of those cases the current algorithm was partially threading the loop because it was hitting a limit on the number of paths to be explored. On increasing this limit even the current algorithm finds paths where the unpredictable value is seen. Compile time(with pass enabled by default and this patch): https://llvm-compile-time-tracker.com/compare.php?from=8c5e9cf737138aba22a4a8f64ef2c5efc80dd7f9&to=42c75d888058b35c6d15901b34e36251d8f766b9&stat=instructions:u
1 parent f2794cc commit c9325f8

File tree

3 files changed

+156
-11
lines changed

3 files changed

+156
-11
lines changed

llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ static cl::opt<bool>
9696
cl::desc("View the CFG before DFA Jump Threading"),
9797
cl::Hidden, cl::init(false));
9898

99+
static cl::opt<bool> EarlyExitHeuristic(
100+
"dfa-early-exit-heuristic",
101+
cl::desc("Exit early if an unpredictable value come from the same loop"),
102+
cl::Hidden, cl::init(true));
103+
99104
static cl::opt<unsigned> MaxPathLength(
100105
"dfa-max-path-length",
101106
cl::desc("Max number of blocks searched to find a threading path"),
@@ -405,7 +410,7 @@ struct MainSwitch {
405410
///
406411
/// Also, collect select instructions to unfold.
407412
bool isCandidate(const SwitchInst *SI) {
408-
std::deque<Value *> Q;
413+
std::deque<std::pair<Value *, BasicBlock *>> Q;
409414
SmallSet<Value *, 16> SeenValues;
410415
SelectInsts.clear();
411416

@@ -415,25 +420,28 @@ struct MainSwitch {
415420
return false;
416421

417422
// The switch must be in a loop.
418-
if (!LI->getLoopFor(SI->getParent()))
423+
const Loop *L = LI->getLoopFor(SI->getParent());
424+
if (!L)
419425
return false;
420426

421-
addToQueue(SICond, Q, SeenValues);
427+
addToQueue(SICond, nullptr, Q, SeenValues);
422428

423429
while (!Q.empty()) {
424-
Value *Current = Q.front();
430+
Value *Current = Q.front().first;
431+
BasicBlock *CurrentIncomingBB = Q.front().second;
425432
Q.pop_front();
426433

427434
if (auto *Phi = dyn_cast<PHINode>(Current)) {
428-
for (Value *Incoming : Phi->incoming_values()) {
429-
addToQueue(Incoming, Q, SeenValues);
435+
for (BasicBlock *IncomingBB : Phi->blocks()) {
436+
Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);
437+
addToQueue(Incoming, IncomingBB, Q, SeenValues);
430438
}
431439
LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
432440
} else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
433441
if (!isValidSelectInst(SelI))
434442
return false;
435-
addToQueue(SelI->getTrueValue(), Q, SeenValues);
436-
addToQueue(SelI->getFalseValue(), Q, SeenValues);
443+
addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
444+
addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
437445
LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
438446
if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
439447
SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
@@ -446,18 +454,31 @@ struct MainSwitch {
446454
// initial switch values that can be ignored (they will hit the
447455
// unthreaded switch) but this assumption will get checked later after
448456
// paths have been enumerated (in function getStateDefMap).
457+
458+
// If the unpredictable value comes from the same inner loop it is
459+
// likely that it will also be on the enumerated paths, causing us to
460+
// exit after we have enumerated all the paths. This heuristic save
461+
// compile time because a search for all the paths can become expensive.
462+
if (EarlyExitHeuristic &&
463+
L->contains(LI->getLoopFor(CurrentIncomingBB))) {
464+
LLVM_DEBUG(dbgs()
465+
<< "\tExiting early due to unpredictability heuristic.\n");
466+
return false;
467+
}
468+
449469
continue;
450470
}
451471
}
452472

453473
return true;
454474
}
455475

456-
void addToQueue(Value *Val, std::deque<Value *> &Q,
476+
void addToQueue(Value *Val, BasicBlock *BB,
477+
std::deque<std::pair<Value *, BasicBlock *>> &Q,
457478
SmallSet<Value *, 16> &SeenValues) {
458479
if (SeenValues.contains(Val))
459480
return;
460-
Q.push_back(Val);
481+
Q.push_back({Val, BB});
461482
SeenValues.insert(Val);
462483
}
463484

llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
2+
; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false %s | FileCheck %s
33

44
; These tests check if selects are unfolded properly for jump threading
55
; opportunities. There are three different patterns to consider:
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -S -passes=dfa-jump-threading %s -debug-only=dfa-jump-threading 2>&1 | FileCheck %s
3+
4+
; CHECK-COUNT-3: Exiting early due to unpredictability heuristic.
5+
6+
@.str.1 = private unnamed_addr constant [3 x i8] c"10\00", align 1
7+
@.str.2 = private unnamed_addr constant [3 x i8] c"30\00", align 1
8+
@.str.3 = private unnamed_addr constant [3 x i8] c"20\00", align 1
9+
@.str.4 = private unnamed_addr constant [3 x i8] c"40\00", align 1
10+
11+
define void @test1(i32 noundef %num, i32 noundef %num2) {
12+
entry:
13+
br label %while.body
14+
15+
while.body: ; preds = %entry, %sw.epilog
16+
%num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
17+
switch i32 %num.addr.0, label %sw.default [
18+
i32 10, label %sw.bb
19+
i32 30, label %sw.bb1
20+
i32 20, label %sw.bb2
21+
i32 40, label %sw.bb3
22+
]
23+
24+
sw.bb: ; preds = %while.body
25+
%call.i = tail call i32 @bar(ptr noundef nonnull @.str.1)
26+
br label %sw.epilog
27+
28+
sw.bb1: ; preds = %while.body
29+
%call.i4 = tail call i32 @bar(ptr noundef nonnull @.str.2)
30+
br label %sw.epilog
31+
32+
sw.bb2: ; preds = %while.body
33+
%call.i5 = tail call i32 @bar(ptr noundef nonnull @.str.3)
34+
br label %sw.epilog
35+
36+
sw.bb3: ; preds = %while.body
37+
%call.i6 = tail call i32 @bar(ptr noundef nonnull @.str.4)
38+
%call = tail call noundef i32 @foo()
39+
%add = add nsw i32 %call, %num2
40+
br label %sw.epilog
41+
42+
sw.default: ; preds = %while.body
43+
ret void
44+
45+
sw.epilog: ; preds = %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
46+
%num.addr.1 = phi i32 [ %add, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %sw.bb ]
47+
br label %while.body
48+
}
49+
50+
51+
define void @test2(i32 noundef %num, i32 noundef %num2) {
52+
entry:
53+
br label %while.body
54+
55+
while.body: ; preds = %entry, %sw.epilog
56+
%num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
57+
switch i32 %num.addr.0, label %sw.default [
58+
i32 10, label %sw.epilog
59+
i32 30, label %sw.bb1
60+
i32 20, label %sw.bb2
61+
i32 40, label %sw.bb3
62+
]
63+
64+
sw.bb1: ; preds = %while.body
65+
br label %sw.epilog
66+
67+
sw.bb2: ; preds = %while.body
68+
br label %sw.epilog
69+
70+
sw.bb3: ; preds = %while.body
71+
br label %sw.epilog
72+
73+
sw.default: ; preds = %while.body
74+
ret void
75+
76+
sw.epilog: ; preds = %while.body, %sw.bb3, %sw.bb2, %sw.bb1
77+
%.str.4.sink = phi ptr [ @.str.4, %sw.bb3 ], [ @.str.3, %sw.bb2 ], [ @.str.2, %sw.bb1 ], [ @.str.1, %while.body ]
78+
%num.addr.1 = phi i32 [ %num2, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %while.body ]
79+
%call.i6 = tail call i32 @bar(ptr noundef nonnull %.str.4.sink)
80+
br label %while.body
81+
}
82+
83+
84+
define void @test3(i32 noundef %num, i32 noundef %num2) {
85+
entry:
86+
%add = add nsw i32 %num2, 40
87+
br label %while.body
88+
89+
while.body: ; preds = %entry, %sw.epilog
90+
%num.addr.0 = phi i32 [ %num, %entry ], [ %num.addr.1, %sw.epilog ]
91+
switch i32 %num.addr.0, label %sw.default [
92+
i32 10, label %sw.bb
93+
i32 30, label %sw.bb1
94+
i32 20, label %sw.bb2
95+
i32 40, label %sw.bb3
96+
]
97+
98+
sw.bb: ; preds = %while.body
99+
%call.i = tail call i32 @bar(ptr noundef nonnull @.str.1)
100+
br label %sw.epilog
101+
102+
sw.bb1: ; preds = %while.body
103+
%call.i5 = tail call i32 @bar(ptr noundef nonnull @.str.2)
104+
br label %sw.epilog
105+
106+
sw.bb2: ; preds = %while.body
107+
%call.i6 = tail call i32 @bar(ptr noundef nonnull @.str.3)
108+
br label %sw.epilog
109+
110+
sw.bb3: ; preds = %while.body
111+
%call.i7 = tail call i32 @bar(ptr noundef nonnull @.str.4)
112+
br label %sw.epilog
113+
114+
sw.default: ; preds = %while.body
115+
ret void
116+
117+
sw.epilog: ; preds = %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
118+
%num.addr.1 = phi i32 [ %add, %sw.bb3 ], [ 40, %sw.bb2 ], [ 20, %sw.bb1 ], [ 30, %sw.bb ]
119+
br label %while.body
120+
}
121+
122+
123+
declare noundef i32 @foo()
124+
declare noundef i32 @bar(ptr nocapture noundef readonly)

0 commit comments

Comments
 (0)