Skip to content

Commit 7e7aad1

Browse files
committed
[WebAssembly] Optimize the number of routing blocks in FixIrreducibleCFG
Summary: Currently we create a routing block to the dispatch block for every predecessor of every entry. So the total number of routing blocks created will be (# of preds) * (# of entries). But we don't need to do this: we need at most 2 routing blocks per loop entry, one for when the predecessor is inside the loop and one for it is outside the loop. (We can't merge these into one because this will creates another loop cycle between blocks inside and blocks outside) This patch fixes this and creates at most 2 routing blocks per entry. This also renames variable `Split` to `Routing`, which I think is a bit clearer. Reviewers: kripken Subscribers: sunfish, dschuff, sbc100, jgravelle-google, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59462 llvm-svn: 357337
1 parent 916709e commit 7e7aad1

File tree

3 files changed

+153
-18
lines changed

3 files changed

+153
-18
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp

Lines changed: 62 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
230230
MachineFunction &MF);
231231

232232
void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
233-
MachineFunction &MF);
233+
MachineFunction &MF, const ReachabilityGraph &Graph);
234234

235235
public:
236236
static char ID; // Pass identification, replacement for typeid
@@ -279,7 +279,7 @@ bool WebAssemblyFixIrreducibleControlFlow::processRegion(
279279
}
280280

281281
if (MutualLoopEntries.size() > 1) {
282-
makeSingleEntryLoop(MutualLoopEntries, Blocks, MF);
282+
makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
283283
FoundIrreducibility = true;
284284
Changed = true;
285285
break;
@@ -315,9 +315,12 @@ bool WebAssemblyFixIrreducibleControlFlow::processRegion(
315315
// Given a set of entries to a single loop, create a single entry for that
316316
// loop by creating a dispatch block for them, routing control flow using
317317
// a helper variable. Also updates Blocks with any new blocks created, so
318-
// that we properly track all the blocks in the region.
318+
// that we properly track all the blocks in the region. But this does not update
319+
// ReachabilityGraph; this will be updated in the caller of this function as
320+
// needed.
319321
void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
320-
BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF) {
322+
BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
323+
const ReachabilityGraph &Graph) {
321324
assert(Entries.size() >= 2);
322325

323326
// Sort the entries to ensure a deterministic build.
@@ -385,36 +388,78 @@ void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
385388
}
386389
}
387390

388-
for (MachineBasicBlock *Pred : AllPreds) {
389-
DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
391+
// This set stores predecessors within this loop.
392+
DenseSet<MachineBasicBlock *> InLoop;
393+
for (auto *Pred : AllPreds) {
390394
for (auto *Entry : Pred->successors()) {
391-
if (!Entries.count(Entry)) {
395+
if (!Entries.count(Entry))
392396
continue;
397+
if (Graph.canReach(Entry, Pred)) {
398+
InLoop.insert(Pred);
399+
break;
393400
}
401+
}
402+
}
403+
404+
// Record if each entry has a layout predecessor. This map stores
405+
// <<Predecessor is within the loop?, loop entry>, layout predecessor>
406+
std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
407+
EntryToLayoutPred;
408+
for (auto *Pred : AllPreds)
409+
for (auto *Entry : Pred->successors())
410+
if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
411+
EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
412+
413+
// We need to create at most two routing blocks per entry: one for
414+
// predecessors outside the loop and one for predecessors inside the loop.
415+
// This map stores
416+
// <<Predecessor is within the loop?, loop entry>, routing block>
417+
std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
418+
for (auto *Pred : AllPreds) {
419+
bool PredInLoop = InLoop.count(Pred);
420+
for (auto *Entry : Pred->successors()) {
421+
if (!Entries.count(Entry) ||
422+
Map.count(std::make_pair(InLoop.count(Pred), Entry)))
423+
continue;
424+
// If there exists a layout predecessor of this entry and this predecessor
425+
// is not that, we rather create a routing block after that layout
426+
// predecessor to save a branch.
427+
if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
428+
EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
429+
continue;
394430

395431
// This is a successor we need to rewrite.
396-
MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
432+
MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
397433
MF.insert(Pred->isLayoutSuccessor(Entry)
398434
? MachineFunction::iterator(Entry)
399435
: MF.end(),
400-
Split);
401-
Blocks.insert(Split);
436+
Routing);
437+
Blocks.insert(Routing);
402438

403439
// Set the jump table's register of the index of the block we wish to
404440
// jump to, and jump to the jump table.
405-
BuildMI(Split, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
441+
BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
406442
.addImm(Indices[Entry]);
407-
BuildMI(Split, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
408-
Split->addSuccessor(Dispatch);
409-
Map[Entry] = Split;
443+
BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
444+
Routing->addSuccessor(Dispatch);
445+
Map[std::make_pair(PredInLoop, Entry)] = Routing;
410446
}
447+
}
448+
449+
for (auto *Pred : AllPreds) {
450+
bool PredInLoop = InLoop.count(Pred);
411451
// Remap the terminator operands and the successor list.
412452
for (MachineInstr &Term : Pred->terminators())
413453
for (auto &Op : Term.explicit_uses())
414454
if (Op.isMBB() && Indices.count(Op.getMBB()))
415-
Op.setMBB(Map[Op.getMBB()]);
416-
for (auto Rewrite : Map)
417-
Pred->replaceSuccessor(Rewrite.first, Rewrite.second);
455+
Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
456+
457+
for (auto *Succ : Pred->successors()) {
458+
if (!Entries.count(Succ))
459+
continue;
460+
auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
461+
Pred->replaceSuccessor(Succ, Routing);
462+
}
418463
}
419464

420465
// Create a fake default label, because br_table requires one.

llvm/test/CodeGen/WebAssembly/irreducible-cfg.ll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,18 @@ bb19: ; preds = %bb6
9393
ret void
9494
}
9595

96-
; A simple loop 2 blocks that are both entries.
96+
; A simple loop 2 blocks that are both entries: A1 and A2.
97+
; Even though A1 and A2 both have 3 predecessors (A0, A1, and A2), not 6 but
98+
; only 4 new routing blocks to the dispatch block should be generated.
9799

98100
; CHECK-LABEL: test2:
99101
; CHECK: br_if
100102
; CHECK: i32.const $[[REG:[^,]+]]=
103+
; CHECK: i32.const $[[REG]]=
101104
; CHECK: br_table $[[REG]],
105+
; CHECK: i32.const $[[REG]]=
106+
; CHECK: i32.const $[[REG]]=
107+
; CHECK-NOT: i32.const $[[REG]]=
102108
define i32 @test2(i32) {
103109
entry:
104110
br label %A0
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# RUN: llc -mtriple=wasm32-unknown-unknown -run-pass wasm-fix-irreducible-control-flow %s -o - | FileCheck %s
2+
3+
# This tests if we correctly create at most 2 routing blocks per entry block,
4+
# and also whether those routing blocks are generated in the correct place. If
5+
# one of the predecessor is the layout predecessor of an entry, a routing block
6+
# for the entry should be generated right after the layout predecessor.
7+
8+
--- |
9+
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
10+
target triple = "wasm32-unknown-unknown"
11+
12+
define void @test0() {
13+
pred0:
14+
ret void
15+
pred1:
16+
ret void
17+
entry0:
18+
ret void
19+
entry1:
20+
ret void
21+
}
22+
...
23+
24+
---
25+
# CHECK-LABEL: test0
26+
name: test0
27+
liveins:
28+
- { reg: '$arguments' }
29+
body: |
30+
bb.0.pred0:
31+
successors: %bb.1, %bb.2
32+
liveins: $arguments
33+
%0:i32 = CONST_I32 100, implicit-def $arguments
34+
BR_IF %bb.2, %0:i32, implicit-def $arguments
35+
; CHECK: bb.0.pred0:
36+
; CHECK: BR_IF %bb.2, %0, implicit-def $arguments
37+
38+
bb.1.pred1:
39+
; predecessors: %bb.1
40+
successors: %bb.2, %bb.3
41+
BR_IF %bb.3, %0:i32, implicit-def $arguments
42+
; CHECK: bb.1.pred1:
43+
; CHECK: BR_IF %bb.7, %0, implicit-def $arguments
44+
; This falls through to bb.2, so we don't need an additional BR here
45+
; CHECK-NOT: BR
46+
47+
; Routing block for entry0, when predecessor is outside the loop
48+
; This routing block is shared between the two predecessors: pred0 and pred1.
49+
; CHECK: bb.2:
50+
; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments
51+
; CHECK: BR %bb.6, implicit-def $arguments
52+
53+
bb.2.entry0:
54+
; predecessors: %bb.0, %bb.1, %bb.1
55+
successors: %bb.3
56+
BR %bb.3, implicit-def $arguments
57+
; CHECK: bb.3.entry0:
58+
; CHECK: BR %bb.4, implicit-def $arguments
59+
60+
; Routing block for entry1, when predecessor is inside the loop
61+
; CHECK: bb.4:
62+
; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments
63+
; CHECK: BR %bb.6, implicit-def $arguments
64+
65+
bb.3.entry1:
66+
; predecessors: %bb.1, %bb.2
67+
successors: %bb.2
68+
BR %bb.2, implicit-def $arguments
69+
; CHECK: bb.5.entry1:
70+
; CHECK: BR %bb.8, implicit-def $arguments
71+
72+
; Dispatch block
73+
; CHECK: bb.6:
74+
; CHECK: BR_TABLE_I32 %1, %bb.3, %bb.5, %bb.5, implicit-def $arguments
75+
76+
; Routing block for entry1, when predecessor is outside the loop
77+
; CHECK: bb.7:
78+
; CHECK: %1:i32 = CONST_I32 1, implicit-def $arguments
79+
; CHECK: BR %bb.6, implicit-def $arguments
80+
81+
; Routing block for entry0, when predecessor is inside the loop
82+
; CHECK: bb.8:
83+
; CHECK: %1:i32 = CONST_I32 0, implicit-def $arguments
84+
; CHECK: BR %bb.6, implicit-def $arguments

0 commit comments

Comments
 (0)