Skip to content

Commit d4097b4

Browse files
author
Yevgeny Rouban
committed
[SimpleLoopUnswitch] Implement handling of prof branch_weights metadata for SwitchInst
Differential Revision: https://reviews.llvm.org/D60606 llvm-svn: 364734
1 parent 9872269 commit d4097b4

File tree

3 files changed

+301
-17
lines changed

3 files changed

+301
-17
lines changed

llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -594,11 +594,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
594594
ExitCaseIndices.push_back(Case.getCaseIndex());
595595
}
596596
BasicBlock *DefaultExitBB = nullptr;
597+
SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
598+
SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
597599
if (!L.contains(SI.getDefaultDest()) &&
598600
areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
599-
!isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
601+
!isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) {
600602
DefaultExitBB = SI.getDefaultDest();
601-
else if (ExitCaseIndices.empty())
603+
} else if (ExitCaseIndices.empty())
602604
return false;
603605

604606
LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
@@ -622,8 +624,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
622624

623625
// Store the exit cases into a separate data structure and remove them from
624626
// the switch.
625-
SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
627+
SmallVector<std::tuple<ConstantInt *, BasicBlock *,
628+
SwitchInstProfUpdateWrapper::CaseWeightOpt>,
629+
4> ExitCases;
626630
ExitCases.reserve(ExitCaseIndices.size());
631+
SwitchInstProfUpdateWrapper SIW(SI);
627632
// We walk the case indices backwards so that we remove the last case first
628633
// and don't disrupt the earlier indices.
629634
for (unsigned Index : reverse(ExitCaseIndices)) {
@@ -633,9 +638,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
633638
if (!ExitL || ExitL->contains(OuterL))
634639
OuterL = ExitL;
635640
// Save the value of this case.
636-
ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
641+
auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
642+
ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
637643
// Delete the unswitched cases.
638-
SI.removeCase(CaseI);
644+
SIW.removeCase(CaseI);
639645
}
640646

641647
if (SE) {
@@ -673,6 +679,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
673679

674680
// Now add the unswitched switch.
675681
auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
682+
SwitchInstProfUpdateWrapper NewSIW(*NewSI);
676683

677684
// Rewrite the IR for the unswitched basic blocks. This requires two steps.
678685
// First, we split any exit blocks with remaining in-loop predecessors. Then
@@ -700,9 +707,9 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
700707
}
701708
// Note that we must use a reference in the for loop so that we update the
702709
// container.
703-
for (auto &CasePair : reverse(ExitCases)) {
710+
for (auto &ExitCase : reverse(ExitCases)) {
704711
// Grab a reference to the exit block in the pair so that we can update it.
705-
BasicBlock *ExitBB = CasePair.second;
712+
BasicBlock *ExitBB = std::get<1>(ExitCase);
706713

707714
// If this case is the last edge into the exit block, we can simply reuse it
708715
// as it will no longer be a loop exit. No mapping necessary.
@@ -724,27 +731,39 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
724731
/*FullUnswitch*/ true);
725732
}
726733
// Update the case pair to point to the split block.
727-
CasePair.second = SplitExitBB;
734+
std::get<1>(ExitCase) = SplitExitBB;
728735
}
729736

730737
// Now add the unswitched cases. We do this in reverse order as we built them
731738
// in reverse order.
732-
for (auto CasePair : reverse(ExitCases)) {
733-
ConstantInt *CaseVal = CasePair.first;
734-
BasicBlock *UnswitchedBB = CasePair.second;
739+
for (auto &ExitCase : reverse(ExitCases)) {
740+
ConstantInt *CaseVal = std::get<0>(ExitCase);
741+
BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
735742

736-
NewSI->addCase(CaseVal, UnswitchedBB);
743+
NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
737744
}
738745

739746
// If the default was unswitched, re-point it and add explicit cases for
740747
// entering the loop.
741748
if (DefaultExitBB) {
742-
NewSI->setDefaultDest(DefaultExitBB);
749+
NewSIW->setDefaultDest(DefaultExitBB);
750+
NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
743751

744752
// We removed all the exit cases, so we just copy the cases to the
745753
// unswitched switch.
746-
for (auto Case : SI.cases())
747-
NewSI->addCase(Case.getCaseValue(), NewPH);
754+
for (const auto &Case : SI.cases())
755+
NewSIW.addCase(Case.getCaseValue(), NewPH,
756+
SIW.getSuccessorWeight(Case.getSuccessorIndex()));
757+
} else if (DefaultCaseWeight) {
758+
// We have to set branch weight of the default case.
759+
uint64_t SW = *DefaultCaseWeight;
760+
for (const auto &Case : SI.cases()) {
761+
auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
762+
assert(W &&
763+
"case weight must be defined as default case weight is defined");
764+
SW += *W;
765+
}
766+
NewSIW.setSuccessorWeight(0, SW);
748767
}
749768

750769
// If we ended up with a common successor for every path through the switch
@@ -769,7 +788,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
769788
/*KeepOneInputPHIs*/ true);
770789
}
771790
// Now nuke the switch and replace it with a direct branch.
772-
SI.eraseFromParent();
791+
SIW.eraseFromParent();
773792
BranchInst::Create(CommonSuccBB, BB);
774793
} else if (DefaultExitBB) {
775794
assert(SI.getNumCases() > 0 &&
@@ -779,8 +798,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
779798
// being simple and keeping the number of edges from this switch to
780799
// successors the same, and avoiding any PHI update complexity.
781800
auto LastCaseI = std::prev(SI.case_end());
801+
782802
SI.setDefaultDest(LastCaseI->getCaseSuccessor());
783-
SI.removeCase(LastCaseI);
803+
SIW.setSuccessorWeight(
804+
0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
805+
SIW.removeCase(LastCaseI);
784806
}
785807

786808
// Walk the unswitched exit blocks and the unswitched split blocks and update
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
2+
; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
3+
4+
declare void @incf()
5+
declare void @decf()
6+
7+
define i32 @test2(i32 %c) {
8+
; CHECK-LABEL: @test2(
9+
br label %loop_begin
10+
11+
; CHECK: !prof ![[MD0:[0-9]+]]
12+
; CHECK: loop_begin:
13+
; CHECK: !prof ![[MD1:[0-9]+]]
14+
loop_begin:
15+
16+
switch i32 %c, label %default [
17+
i32 1, label %inc
18+
i32 2, label %dec
19+
], !prof !{!"branch_weights", i32 99, i32 1, i32 2}
20+
21+
inc:
22+
call void @incf()
23+
br label %loop_begin
24+
25+
dec:
26+
call void @decf()
27+
br label %loop_begin
28+
29+
default:
30+
ret i32 0
31+
}
32+
33+
; CHECK: ![[MD0]] = !{!"branch_weights", i32 99, i32 1, i32 2}
34+
; CHECK: ![[MD1]] = !{!"branch_weights", i32 2, i32 1}
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; then metadata checks MDn were added manually.
3+
; RUN: opt -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
4+
; RUN: opt -enable-mssa-loop-dependency=true -verify-memoryssa -passes='loop(unswitch),verify<loops>' -S < %s | FileCheck %s
5+
6+
declare void @some_func()
7+
8+
; Test for a trivially unswitchable switch with non-default case exiting.
9+
define i32 @test2(i32* %var, i32 %cond1, i32 %cond2) {
10+
; CHECK-LABEL: @test2(
11+
; CHECK-NEXT: entry:
12+
; CHECK-NEXT: switch i32 [[COND2:%.*]], label [[ENTRY_SPLIT:%.*]] [
13+
; CHECK-NEXT: i32 2, label [[LOOP_EXIT2:%.*]]
14+
; CHECK-NEXT: ], !prof ![[MD0:[0-9]+]]
15+
; CHECK: entry.split:
16+
; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]]
17+
; CHECK: loop_begin:
18+
; CHECK-NEXT: [[VAR_VAL:%.*]] = load i32, i32* [[VAR:%.*]]
19+
; CHECK-NEXT: switch i32 [[COND2]], label [[LOOP2:%.*]] [
20+
; CHECK-NEXT: i32 0, label [[LOOP0:%.*]]
21+
; CHECK-NEXT: i32 1, label [[LOOP1:%.*]]
22+
; CHECK-NEXT: ], !prof ![[MD1:[0-9]+]]
23+
; CHECK: loop0:
24+
; CHECK-NEXT: call void @some_func()
25+
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
26+
; CHECK: loop1:
27+
; CHECK-NEXT: call void @some_func()
28+
; CHECK-NEXT: br label [[LOOP_LATCH]]
29+
; CHECK: loop2:
30+
; CHECK-NEXT: call void @some_func()
31+
; CHECK-NEXT: br label [[LOOP_LATCH]]
32+
; CHECK: loop_latch:
33+
; CHECK-NEXT: br label [[LOOP_BEGIN]]
34+
; CHECK: loop_exit1:
35+
; CHECK-NEXT: ret i32 0
36+
; CHECK: loop_exit2:
37+
; CHECK-NEXT: ret i32 0
38+
; CHECK: loop_exit3:
39+
; CHECK-NEXT: ret i32 0
40+
;
41+
entry:
42+
br label %loop_begin
43+
44+
loop_begin:
45+
%var_val = load i32, i32* %var
46+
switch i32 %cond2, label %loop2 [
47+
i32 0, label %loop0
48+
i32 1, label %loop1
49+
i32 2, label %loop_exit2
50+
], !prof !{!"branch_weights", i32 99, i32 100, i32 101, i32 102}
51+
52+
loop0:
53+
call void @some_func()
54+
br label %loop_latch
55+
56+
loop1:
57+
call void @some_func()
58+
br label %loop_latch
59+
60+
loop2:
61+
call void @some_func()
62+
br label %loop_latch
63+
64+
loop_latch:
65+
br label %loop_begin
66+
67+
loop_exit1:
68+
ret i32 0
69+
70+
loop_exit2:
71+
ret i32 0
72+
73+
loop_exit3:
74+
ret i32 0
75+
}
76+
77+
; Test for a trivially unswitchable switch with only the default case exiting.
78+
define i32 @test3(i32* %var, i32 %cond1, i32 %cond2) {
79+
; CHECK-LABEL: @test3(
80+
; CHECK-NEXT: entry:
81+
; CHECK-NEXT: switch i32 [[COND2:%.*]], label [[LOOP_EXIT2:%.*]] [
82+
; CHECK-NEXT: i32 0, label [[ENTRY_SPLIT:%.*]]
83+
; CHECK-NEXT: i32 1, label [[ENTRY_SPLIT]]
84+
; CHECK-NEXT: i32 2, label [[ENTRY_SPLIT]]
85+
; CHECK-NEXT: ], !prof ![[MD2:[0-9]+]]
86+
; CHECK: entry.split:
87+
; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]]
88+
; CHECK: loop_begin:
89+
; CHECK-NEXT: [[VAR_VAL:%.*]] = load i32, i32* [[VAR:%.*]]
90+
; CHECK-NEXT: switch i32 [[COND2]], label [[LOOP2:%.*]] [
91+
; CHECK-NEXT: i32 0, label [[LOOP0:%.*]]
92+
; CHECK-NEXT: i32 1, label [[LOOP1:%.*]]
93+
; CHECK-NEXT: ], !prof ![[MD3:[0-9]+]]
94+
; CHECK: loop0:
95+
; CHECK-NEXT: call void @some_func()
96+
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
97+
; CHECK: loop1:
98+
; CHECK-NEXT: call void @some_func()
99+
; CHECK-NEXT: br label [[LOOP_LATCH]]
100+
; CHECK: loop2:
101+
; CHECK-NEXT: call void @some_func()
102+
; CHECK-NEXT: br label [[LOOP_LATCH]]
103+
; CHECK: loop_latch:
104+
; CHECK-NEXT: br label [[LOOP_BEGIN]]
105+
; CHECK: loop_exit1:
106+
; CHECK-NEXT: ret i32 0
107+
; CHECK: loop_exit2:
108+
; CHECK-NEXT: ret i32 0
109+
; CHECK: loop_exit3:
110+
; CHECK-NEXT: ret i32 0
111+
;
112+
entry:
113+
br label %loop_begin
114+
115+
loop_begin:
116+
%var_val = load i32, i32* %var
117+
switch i32 %cond2, label %loop_exit2 [
118+
i32 0, label %loop0
119+
i32 1, label %loop1
120+
i32 2, label %loop2
121+
], !prof !{!"branch_weights", i32 99, i32 100, i32 101, i32 102}
122+
123+
loop0:
124+
call void @some_func()
125+
br label %loop_latch
126+
127+
loop1:
128+
call void @some_func()
129+
br label %loop_latch
130+
131+
loop2:
132+
call void @some_func()
133+
br label %loop_latch
134+
135+
loop_latch:
136+
br label %loop_begin
137+
138+
loop_exit1:
139+
ret i32 0
140+
141+
loop_exit2:
142+
ret i32 0
143+
144+
loop_exit3:
145+
ret i32 0
146+
}
147+
148+
; Test for a trivially unswitchable switch with multiple exiting cases and
149+
; multiple looping cases.
150+
define i32 @test4(i32* %var, i32 %cond1, i32 %cond2) {
151+
; CHECK-LABEL: @test4(
152+
; CHECK-NEXT: entry:
153+
; CHECK-NEXT: switch i32 [[COND2:%.*]], label [[LOOP_EXIT2:%.*]] [
154+
; CHECK-NEXT: i32 13, label [[LOOP_EXIT1:%.*]]
155+
; CHECK-NEXT: i32 42, label [[LOOP_EXIT3:%.*]]
156+
; CHECK-NEXT: i32 0, label [[ENTRY_SPLIT:%.*]]
157+
; CHECK-NEXT: i32 1, label [[ENTRY_SPLIT]]
158+
; CHECK-NEXT: i32 2, label [[ENTRY_SPLIT]]
159+
; CHECK-NEXT: ], !prof ![[MD4:[0-9]+]]
160+
; CHECK: entry.split:
161+
; CHECK-NEXT: br label [[LOOP_BEGIN:%.*]]
162+
; CHECK: loop_begin:
163+
; CHECK-NEXT: [[VAR_VAL:%.*]] = load i32, i32* [[VAR:%.*]]
164+
; CHECK-NEXT: switch i32 [[COND2]], label [[LOOP2:%.*]] [
165+
; CHECK-NEXT: i32 0, label [[LOOP0:%.*]]
166+
; CHECK-NEXT: i32 1, label [[LOOP1:%.*]]
167+
; CHECK-NEXT: ], !prof ![[MD3:[0-9]+]]
168+
; CHECK: loop0:
169+
; CHECK-NEXT: call void @some_func()
170+
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
171+
; CHECK: loop1:
172+
; CHECK-NEXT: call void @some_func()
173+
; CHECK-NEXT: br label [[LOOP_LATCH]]
174+
; CHECK: loop2:
175+
; CHECK-NEXT: call void @some_func()
176+
; CHECK-NEXT: br label [[LOOP_LATCH]]
177+
; CHECK: loop_latch:
178+
; CHECK-NEXT: br label [[LOOP_BEGIN]]
179+
; CHECK: loop_exit1:
180+
; CHECK-NEXT: ret i32 0
181+
; CHECK: loop_exit2:
182+
; CHECK-NEXT: ret i32 0
183+
; CHECK: loop_exit3:
184+
; CHECK-NEXT: ret i32 0
185+
;
186+
entry:
187+
br label %loop_begin
188+
189+
loop_begin:
190+
%var_val = load i32, i32* %var
191+
switch i32 %cond2, label %loop_exit2 [
192+
i32 0, label %loop0
193+
i32 1, label %loop1
194+
i32 13, label %loop_exit1
195+
i32 2, label %loop2
196+
i32 42, label %loop_exit3
197+
], !prof !{!"branch_weights", i32 99, i32 100, i32 101, i32 113, i32 102, i32 142}
198+
199+
loop0:
200+
call void @some_func()
201+
br label %loop_latch
202+
203+
loop1:
204+
call void @some_func()
205+
br label %loop_latch
206+
207+
loop2:
208+
call void @some_func()
209+
br label %loop_latch
210+
211+
loop_latch:
212+
br label %loop_begin
213+
214+
loop_exit1:
215+
ret i32 0
216+
217+
loop_exit2:
218+
ret i32 0
219+
220+
loop_exit3:
221+
ret i32 0
222+
}
223+
224+
; CHECK: ![[MD0]] = !{!"branch_weights", i32 300, i32 102}
225+
; CHECK: ![[MD1]] = !{!"branch_weights", i32 99, i32 100, i32 101}
226+
; CHECK: ![[MD2]] = !{!"branch_weights", i32 99, i32 100, i32 101, i32 102}
227+
; CHECK: ![[MD3]] = !{!"branch_weights", i32 102, i32 100, i32 101}
228+
; CHECK: ![[MD4]] = !{!"branch_weights", i32 99, i32 113, i32 142, i32 100, i32 101, i32 102}

0 commit comments

Comments
 (0)