Skip to content

Commit d04f759

Browse files
committed
[MachineSink+PGO] Teach MachineSink to use BlockFrequencyInfo
Machine Sink uses loop depth information to select between successors BBs to sink machine instructions into, where BBs within smaller loop depths are preferable. This patch adds support for choosing between successors by using profile information from BlockFrequencyInfo instead, whenever the information is available. Tested it under SPEC2006 train (average of 30 runs for each program); ~1.5% execution speedup in average on x86-64 darwin. <rdar://problem/18021659> llvm-svn: 218472
1 parent eac48b6 commit d04f759

File tree

2 files changed

+68
-6
lines changed

2 files changed

+68
-6
lines changed

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/ADT/SmallSet.h"
2222
#include "llvm/ADT/Statistic.h"
2323
#include "llvm/Analysis/AliasAnalysis.h"
24+
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
2425
#include "llvm/CodeGen/MachineDominators.h"
2526
#include "llvm/CodeGen/MachineLoopInfo.h"
2627
#include "llvm/CodeGen/MachinePostDominators.h"
@@ -41,6 +42,12 @@ SplitEdges("machine-sink-split",
4142
cl::desc("Split critical edges during machine sinking"),
4243
cl::init(true), cl::Hidden);
4344

45+
static cl::opt<bool>
46+
UseBlockFreqInfo("machine-sink-bfi",
47+
cl::desc("Use block frequency info to find successors to sink"),
48+
cl::init(true), cl::Hidden);
49+
50+
4451
STATISTIC(NumSunk, "Number of machine instructions sunk");
4552
STATISTIC(NumSplit, "Number of critical edges split");
4653
STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -53,6 +60,7 @@ namespace {
5360
MachineDominatorTree *DT; // Machine dominator tree
5461
MachinePostDominatorTree *PDT; // Machine post dominator tree
5562
MachineLoopInfo *LI;
63+
const MachineBlockFrequencyInfo *MBFI;
5664
AliasAnalysis *AA;
5765

5866
// Remember which edges have been considered for breaking.
@@ -81,6 +89,8 @@ namespace {
8189
AU.addPreserved<MachineDominatorTree>();
8290
AU.addPreserved<MachinePostDominatorTree>();
8391
AU.addPreserved<MachineLoopInfo>();
92+
if (UseBlockFreqInfo)
93+
AU.addRequired<MachineBlockFrequencyInfo>();
8494
}
8595

8696
void releaseMemory() override {
@@ -247,6 +257,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
247257
DT = &getAnalysis<MachineDominatorTree>();
248258
PDT = &getAnalysis<MachinePostDominatorTree>();
249259
LI = &getAnalysis<MachineLoopInfo>();
260+
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
250261
AA = &getAnalysis<AliasAnalysis>();
251262

252263
bool EverMadeChange = false;
@@ -566,14 +577,20 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
566577
}
567578

568579
// Otherwise, we should look at all the successors and decide which one
569-
// we should sink to.
570-
// We give successors with smaller loop depth higher priority.
571-
SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
572-
// Sort Successors according to their loop depth.
580+
// we should sink to. If we have reliable block frequency information
581+
// (frequency != 0) available, give successors with smaller frequencies
582+
// higher priority, otherwise prioritize smaller loop depths.
583+
SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(),
584+
MBB->succ_end());
585+
// Sort Successors according to their loop depth or block frequency info.
573586
std::stable_sort(
574587
Succs.begin(), Succs.end(),
575-
[this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) {
576-
return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
588+
[this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
589+
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
590+
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
591+
bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
592+
return HasBlockFreq ? LHSFreq < RHSFreq
593+
: LI->getLoopDepth(L) < LI->getLoopDepth(R);
577594
});
578595
for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(),
579596
E = Succs.end(); SI != E; ++SI) {
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
2+
; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
3+
4+
; Test that by changing BlockFrequencyInfo we change the order in which
5+
; machine-sink looks for sucessor blocks. By not using BFI, both G and B
6+
; have the same loop depth and no instructions is sinked - B is selected but
7+
; can't be used as to avoid breaking a non profitable critical edge. By using
8+
; BFI, "mul" is sinked into the less frequent block G.
9+
define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
10+
; MSINK_BFI-LABEL: sink_freqinfo
11+
; MSINK_BFI: jl
12+
; MSINK_BFI-NEXT: ## BB#
13+
; MSINK_BFI-NEXT: imull
14+
15+
; MSINK_NOBFI-LABEL: sink_freqinfo
16+
; MSINK_NOBFI: imull
17+
; MSINK_NOBFI: jl
18+
entry:
19+
br label %B
20+
21+
B:
22+
%ee = phi i32 [ 0, %entry ], [ %inc, %F ]
23+
%xx = sub i32 %a, %ee
24+
%cond0 = icmp slt i32 %xx, 0
25+
br i1 %cond0, label %F, label %exit, !prof !0
26+
27+
F:
28+
%inc = add nsw i32 %xx, 2
29+
%aa = mul nsw i32 %b, %inc
30+
%exitcond = icmp slt i32 %inc, %a
31+
br i1 %exitcond, label %B, label %G, !prof !1
32+
33+
G:
34+
%ii = add nsw i32 %aa, %a
35+
%ll = add i32 %b, 45
36+
%exitcond2 = icmp sge i32 %ii, %b
37+
br i1 %exitcond2, label %G, label %exit, !prof !2
38+
39+
exit:
40+
ret i32 0
41+
}
42+
43+
!0 = metadata !{metadata !"branch_weights", i32 4, i32 1}
44+
!1 = metadata !{metadata !"branch_weights", i32 128, i32 1}
45+
!2 = metadata !{metadata !"branch_weights", i32 1, i32 1}

0 commit comments

Comments
 (0)