Skip to content

Commit edab7dd

Browse files
author
Victor Huang
committed
Disable hoisting MI to hotter basic blocks
In current Hoist() function of machine licm pass, it will not check the source and destination basic block frequencies that a instruction is hoisted from/to. There is a chance that instruction is hoisted from a cold to a hot basic block. In this patch, we add options to disable machine instruction hoisting if destination block is hotter. Differential Revision: https://reviews.llvm.org/D63676
1 parent c19528f commit edab7dd

File tree

3 files changed

+489
-0
lines changed

3 files changed

+489
-0
lines changed

llvm/lib/CodeGen/MachineLICM.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/ADT/Statistic.h"
2424
#include "llvm/Analysis/AliasAnalysis.h"
2525
#include "llvm/CodeGen/MachineBasicBlock.h"
26+
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
2627
#include "llvm/CodeGen/MachineDominators.h"
2728
#include "llvm/CodeGen/MachineFrameInfo.h"
2829
#include "llvm/CodeGen/MachineFunction.h"
@@ -74,6 +75,27 @@ static cl::opt<bool>
7475
HoistConstStores("hoist-const-stores",
7576
cl::desc("Hoist invariant stores"),
7677
cl::init(true), cl::Hidden);
78+
// The default threshold of 100 (i.e. if target block is 100 times hotter)
79+
// is based on empirical data on a single target and is subject to tuning.
80+
static cl::opt<unsigned>
81+
BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
82+
cl::desc("Do not hoist instructions if target"
83+
"block is N times hotter than the source."),
84+
cl::init(100), cl::Hidden);
85+
86+
enum class UseBFI { None, PGO, All };
87+
88+
static cl::opt<UseBFI>
89+
DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
90+
cl::desc("Disable hoisting instructions to"
91+
" hotter blocks"),
92+
cl::init(UseBFI::None), cl::Hidden,
93+
cl::values(clEnumValN(UseBFI::None, "none",
94+
"disable the feature"),
95+
clEnumValN(UseBFI::PGO, "pgo",
96+
"enable the feature when using profile data"),
97+
clEnumValN(UseBFI::All, "all",
98+
"enable the feature with/wo profile data")));
7799

78100
STATISTIC(NumHoisted,
79101
"Number of machine instructions hoisted out of loops");
@@ -87,6 +109,8 @@ STATISTIC(NumPostRAHoisted,
87109
"Number of machine instructions hoisted out of loops post regalloc");
88110
STATISTIC(NumStoreConst,
89111
"Number of stores of const phys reg hoisted out of loops");
112+
STATISTIC(NumNotHoistedDueToHotness,
113+
"Number of instructions not hoisted due to block frequency");
90114

91115
namespace {
92116

@@ -98,9 +122,11 @@ namespace {
98122
MachineRegisterInfo *MRI;
99123
TargetSchedModel SchedModel;
100124
bool PreRegAlloc;
125+
bool HasProfileData;
101126

102127
// Various analyses that we use...
103128
AliasAnalysis *AA; // Alias analysis info.
129+
MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
104130
MachineLoopInfo *MLI; // Current MachineLoopInfo
105131
MachineDominatorTree *DT; // Machine dominator tree for the cur loop
106132

@@ -150,6 +176,8 @@ namespace {
150176

151177
void getAnalysisUsage(AnalysisUsage &AU) const override {
152178
AU.addRequired<MachineLoopInfo>();
179+
if (DisableHoistingToHotterBlocks != UseBFI::None)
180+
AU.addRequired<MachineBlockFrequencyInfo>();
153181
AU.addRequired<MachineDominatorTree>();
154182
AU.addRequired<AAResultsWrapperPass>();
155183
AU.addPreserved<MachineLoopInfo>();
@@ -245,6 +273,8 @@ namespace {
245273

246274
void InitCSEMap(MachineBasicBlock *BB);
247275

276+
bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
277+
MachineBasicBlock *TgtBlock);
248278
MachineBasicBlock *getCurPreheader();
249279
};
250280

@@ -275,6 +305,7 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
275305
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
276306
"Machine Loop Invariant Code Motion", false, false)
277307
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
308+
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
278309
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
279310
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
280311
INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
@@ -283,6 +314,7 @@ INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
283314
INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
284315
"Early Machine Loop Invariant Code Motion", false, false)
285316
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
317+
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
286318
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
287319
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
288320
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
@@ -315,6 +347,7 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
315347
SchedModel.init(&ST);
316348

317349
PreRegAlloc = MRI->isSSA();
350+
HasProfileData = MF.getFunction().hasProfileData();
318351

319352
if (PreRegAlloc)
320353
LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
@@ -333,6 +366,8 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
333366
}
334367

335368
// Get our Loop information...
369+
if (DisableHoistingToHotterBlocks != UseBFI::None)
370+
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
336371
MLI = &getAnalysis<MachineLoopInfo>();
337372
DT = &getAnalysis<MachineDominatorTree>();
338373
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -1433,6 +1468,15 @@ bool MachineLICMBase::MayCSE(MachineInstr *MI) {
14331468
/// that are safe to hoist, this instruction is called to do the dirty work.
14341469
/// It returns true if the instruction is hoisted.
14351470
bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1471+
MachineBasicBlock *SrcBlock = MI->getParent();
1472+
1473+
// Disable the instruction hoisting due to block hotness
1474+
if ((DisableHoistingToHotterBlocks == UseBFI::All ||
1475+
(DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
1476+
isTgtHotterThanSrc(SrcBlock, Preheader)) {
1477+
++NumNotHoistedDueToHotness;
1478+
return false;
1479+
}
14361480
// First check whether we should hoist this instruction.
14371481
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
14381482
// If not, try unfolding a hoistable load.
@@ -1526,3 +1570,21 @@ MachineBasicBlock *MachineLICMBase::getCurPreheader() {
15261570
}
15271571
return CurPreheader;
15281572
}
1573+
1574+
/// Is the target basic block at least "BlockFrequencyRatioThreshold"
1575+
/// times hotter than the source basic block.
1576+
bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1577+
MachineBasicBlock *TgtBlock) {
1578+
// Parse source and target basic block frequency from MBFI
1579+
uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
1580+
uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
1581+
1582+
// Disable the hoisting if source block frequency is zero
1583+
if (!SrcBF)
1584+
return true;
1585+
1586+
double Ratio = (double)DstBF / SrcBF;
1587+
1588+
// Compare the block frequency ratio with the threshold
1589+
return Ratio > BlockFrequencyRatioThreshold;
1590+
}
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
# NOTE: This test verifies disable/enable instruction hoisting to hot blocks based on non-profile data
2+
# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=all -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-NO-HOIST
3+
# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=all -block-freq-ratio-threshold=100000000 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
4+
# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=pgo -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
5+
# RUN: llc -run-pass early-machinelicm -disable-hoisting-to-hotter-blocks=none -block-freq-ratio-threshold=100 %s -o - | FileCheck %s --check-prefix=CHECK-HOIST
6+
7+
--- |
8+
target datalayout = "e-m:e-i64:64-n32:64"
9+
10+
define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 signext %Len, i32* nocapture %Ptr) {
11+
entry:
12+
tail call void asm sideeffect "#NOTHING", "~{r2}"()
13+
%cmp6 = icmp sgt i32 %Len, 0
14+
br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup
15+
16+
for.body.lr.ph: ; preds = %entry
17+
%cmp1 = icmp sgt i32 %Arg, 10
18+
br label %for.body
19+
20+
for.cond.cleanup: ; preds = %for.inc, %entry
21+
ret void
22+
23+
for.body: ; preds = %for.inc, %for.body.lr.ph
24+
%i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
25+
%0 = load i32, i32* %Ptr, align 4
26+
%1 = add i32 %i.07, %0
27+
store i32 %1, i32* %Ptr, align 4
28+
br i1 %cmp1, label %if.then, label %for.inc
29+
30+
if.then: ; preds = %for.body
31+
tail call void asm sideeffect "#NOTHING", "~{r2}"()
32+
tail call void %fp(i32 signext %Arg)
33+
br label %for.inc
34+
35+
for.inc: ; preds = %if.then, %for.body
36+
%inc = add nuw nsw i32 %i.07, 1
37+
%exitcond = icmp eq i32 %Len, %inc
38+
br i1 %exitcond, label %for.cond.cleanup, label %for.body
39+
}
40+
41+
; Function Attrs: nounwind
42+
declare void @llvm.stackprotector(i8*, i8**) #0
43+
44+
attributes #0 = { nounwind }
45+
46+
...
47+
---
48+
name: test
49+
alignment: 4
50+
exposesReturnsTwice: false
51+
legalized: false
52+
regBankSelected: false
53+
selected: false
54+
failedISel: false
55+
tracksRegLiveness: true
56+
hasWinCFI: false
57+
registers:
58+
- { id: 0, class: crbitrc, preferred-register: '' }
59+
- { id: 1, class: gprc_and_gprc_nor0, preferred-register: '' }
60+
- { id: 2, class: gprc, preferred-register: '' }
61+
- { id: 3, class: g8rc, preferred-register: '' }
62+
- { id: 4, class: g8rc, preferred-register: '' }
63+
- { id: 5, class: g8rc, preferred-register: '' }
64+
- { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' }
65+
- { id: 7, class: gprc, preferred-register: '' }
66+
- { id: 8, class: gprc, preferred-register: '' }
67+
- { id: 9, class: crrc, preferred-register: '' }
68+
- { id: 10, class: gprc, preferred-register: '' }
69+
- { id: 11, class: crrc, preferred-register: '' }
70+
- { id: 12, class: gprc, preferred-register: '' }
71+
- { id: 13, class: gprc, preferred-register: '' }
72+
- { id: 14, class: g8rc, preferred-register: '' }
73+
- { id: 15, class: g8rc, preferred-register: '' }
74+
- { id: 16, class: crrc, preferred-register: '' }
75+
liveins:
76+
- { reg: '$x3', virtual-reg: '%3' }
77+
- { reg: '$x4', virtual-reg: '%4' }
78+
- { reg: '$x5', virtual-reg: '%5' }
79+
- { reg: '$x6', virtual-reg: '%6' }
80+
frameInfo:
81+
isFrameAddressTaken: false
82+
isReturnAddressTaken: false
83+
hasStackMap: false
84+
hasPatchPoint: false
85+
stackSize: 0
86+
offsetAdjustment: 0
87+
maxAlignment: 0
88+
adjustsStack: false
89+
hasCalls: true
90+
stackProtector: ''
91+
maxCallFrameSize: 4294967295
92+
cvBytesOfCalleeSavedRegisters: 0
93+
hasOpaqueSPAdjustment: false
94+
hasVAStart: false
95+
hasMustTailInVarArgFunc: false
96+
localFrameSize: 0
97+
savePoint: ''
98+
restorePoint: ''
99+
fixedStack: []
100+
stack: []
101+
constants: []
102+
machineFunctionInfo: {}
103+
body: |
104+
bb.0.entry:
105+
successors: %bb.1(0x7ecade30), %bb.2(0x013521d0)
106+
liveins: $x3, $x4, $x5, $x6
107+
108+
%6:g8rc_and_g8rc_nox0 = COPY $x6
109+
%5:g8rc = COPY $x5
110+
%4:g8rc = COPY $x4
111+
%3:g8rc = COPY $x3
112+
%7:gprc = COPY %4.sub_32
113+
%8:gprc = COPY %5.sub_32
114+
INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2
115+
%9:crrc = CMPWI %8, 1
116+
BCC 12, killed %9, %bb.2
117+
B %bb.1
118+
119+
bb.1.for.body.lr.ph:
120+
successors: %bb.3(0x80000000)
121+
122+
%11:crrc = CMPWI %7, 10
123+
%0:crbitrc = COPY %11.sub_gt
124+
%10:gprc = LI 0
125+
B %bb.3
126+
127+
bb.2.for.cond.cleanup:
128+
BLR8 implicit $lr8, implicit $rm
129+
130+
bb.3.for.body:
131+
successors: %bb.4(0x00000002), %bb.5(0x7ffffffe)
132+
133+
%1:gprc_and_gprc_nor0 = PHI %10, %bb.1, %2, %bb.5
134+
%12:gprc = LWZ 0, %6 :: (load 4 from %ir.Ptr)
135+
%13:gprc = ADD4 %1, killed %12
136+
STW killed %13, 0, %6 :: (store 4 into %ir.Ptr)
137+
BCn %0, %bb.5
138+
B %bb.4
139+
140+
bb.4.if.then:
141+
successors: %bb.5(0x80000000)
142+
143+
INLINEASM &"#NOTHING", 1, 12, implicit-def early-clobber $r2
144+
ADJCALLSTACKDOWN 32, 0, implicit-def dead $r1, implicit $r1
145+
%14:g8rc = COPY $x2
146+
STD %14, 24, $x1 :: (store 8 into stack + 24)
147+
%15:g8rc = EXTSW_32_64 %7
148+
$x3 = COPY %15
149+
$x12 = COPY %3
150+
MTCTR8 %3, implicit-def $ctr8
151+
BCTRL8_LDinto_toc 24, $x1, csr_svr464_altivec, implicit-def dead $lr8, implicit-def dead $x2, implicit $ctr8, implicit $rm, implicit $x3, implicit $x12, implicit $x2, implicit-def $r1
152+
ADJCALLSTACKUP 32, 0, implicit-def dead $r1, implicit $r1
153+
154+
bb.5.for.inc:
155+
successors: %bb.2(0x013521d0), %bb.3(0x7ecade30)
156+
157+
%2:gprc = nuw nsw ADDI %1, 1
158+
%16:crrc = CMPLW %8, %2
159+
BCC 76, killed %16, %bb.2
160+
B %bb.3
161+
162+
...
163+
164+
# CHECK for enabling instruction hoisting
165+
#CHECK-LABEL: test
166+
#CHECK-HOIST: bb.1.for.body.lr.ph:
167+
#CHECK-HOIST: %14:g8rc = COPY $x2
168+
#CHECK-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
169+
#CHECK-HOIST: %15:g8rc = EXTSW_32_64 %7
170+
#CHECK-HOIST: B %bb.3
171+
172+
#CHECK-HOIST: bb.4.if.then:
173+
#CHECK-HOIST-NOT: %14:g8rc = COPY $x2
174+
#CHECK-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
175+
#CHECK-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
176+
#CHECK-HOIST: bb.5.for.inc:
177+
178+
# CHECK for disabling instruction hoisting due to block hotness
179+
#CHECK-LABEL: test
180+
#CHECK-NO-HOIST: bb.1.for.body.lr.ph:
181+
#CHECK-NO-HOIST-NOT: %14:g8rc = COPY $x2
182+
#CHECK-NO-HOIST-NOT: STD %14, 24, $x1 :: (store 8 into stack + 24)
183+
#CHECK-NO-HOIST-NOT: %15:g8rc = EXTSW_32_64 %7
184+
#CHECK-NO-HOIST: B %bb.3
185+
186+
#CHECK-NO-HOIST: bb.4.if.then:
187+
#CHECK-NO-HOIST: %14:g8rc = COPY $x2
188+
#CHECK-NO-HOIST: STD %14, 24, $x1 :: (store 8 into stack + 24)
189+
#CHECK-NO-HOIST: %15:g8rc = EXTSW_32_64 %7
190+
#CHECK-NO-HOIST: bb.5.for.inc:

0 commit comments

Comments
 (0)