Skip to content

Commit 57c33ac

Browse files
authored
[MachineSink] Sink into consistent blocks for optsize funcs (#115367)
Do not consider profile data when choosing a successor block to sink into for optsize functions. This should result in more consistent instruction sequences which will improve outlining and ICF. We've observed a slight codesize improvement in a large binary. This is similar reasoning to #114607. Using profile data to select a block to sink into was original added in d04f759.
1 parent b8d6659 commit 57c33ac

File tree

2 files changed

+34
-11
lines changed

2 files changed

+34
-11
lines changed

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/ADT/Statistic.h"
2727
#include "llvm/Analysis/AliasAnalysis.h"
2828
#include "llvm/Analysis/CFG.h"
29+
#include "llvm/Analysis/ProfileSummaryInfo.h"
2930
#include "llvm/CodeGen/MachineBasicBlock.h"
3031
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
3132
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -38,6 +39,7 @@
3839
#include "llvm/CodeGen/MachineOperand.h"
3940
#include "llvm/CodeGen/MachinePostDominators.h"
4041
#include "llvm/CodeGen/MachineRegisterInfo.h"
42+
#include "llvm/CodeGen/MachineSizeOpts.h"
4143
#include "llvm/CodeGen/RegisterClassInfo.h"
4244
#include "llvm/CodeGen/RegisterPressure.h"
4345
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -122,6 +124,7 @@ namespace {
122124
MachineDominatorTree *DT = nullptr; // Machine dominator tree
123125
MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
124126
MachineCycleInfo *CI = nullptr;
127+
ProfileSummaryInfo *PSI = nullptr;
125128
MachineBlockFrequencyInfo *MBFI = nullptr;
126129
const MachineBranchProbabilityInfo *MBPI = nullptr;
127130
AliasAnalysis *AA = nullptr;
@@ -198,6 +201,7 @@ namespace {
198201
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
199202
AU.addPreserved<MachineCycleInfoWrapperPass>();
200203
AU.addPreserved<MachineLoopInfoWrapperPass>();
204+
AU.addRequired<ProfileSummaryInfoWrapperPass>();
201205
if (UseBlockFreqInfo)
202206
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
203207
AU.addRequired<TargetPassConfig>();
@@ -284,6 +288,7 @@ char &llvm::MachineSinkingID = MachineSinking::ID;
284288

285289
INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
286290
"Machine code sinking", false, false)
291+
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
287292
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
288293
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
289294
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
@@ -722,6 +727,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
722727
DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
723728
PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
724729
CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
730+
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
725731
MBFI = UseBlockFreqInfo
726732
? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
727733
: nullptr;
@@ -1217,12 +1223,12 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
12171223

12181224
// Sort Successors according to their cycle depth or block frequency info.
12191225
llvm::stable_sort(
1220-
AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
1226+
AllSuccs, [&](const MachineBasicBlock *L, const MachineBasicBlock *R) {
12211227
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
12221228
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
1223-
bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0;
1224-
return HasBlockFreq ? LHSFreq < RHSFreq
1225-
: CI->getCycleDepth(L) < CI->getCycleDepth(R);
1229+
if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) || !LHSFreq || !RHSFreq)
1230+
return CI->getCycleDepth(L) < CI->getCycleDepth(R);
1231+
return LHSFreq < RHSFreq;
12261232
});
12271233

12281234
auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));

llvm/test/CodeGen/X86/sink-blockfreq.ll

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
22
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
3+
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -force-pgso -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
34

45
; Test that by changing BlockFrequencyInfo we change the order in which
56
; machine-sink looks for successor blocks. By not using BFI, both G and B
67
; have the same loop depth and no instructions is sinked - B is selected but
78
; can't be used as to avoid breaking a non profitable critical edge. By using
89
; BFI, "mul" is sinked into the less frequent block G.
9-
define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
10+
define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp !prof !14 {
1011
; MSINK_BFI-LABEL: sink_freqinfo
1112
; MSINK_BFI: jl
1213
; MSINK_BFI-NEXT: ## %bb.
@@ -22,24 +23,40 @@ B:
2223
%ee = phi i32 [ 0, %entry ], [ %inc, %F ]
2324
%xx = sub i32 %a, %ee
2425
%cond0 = icmp slt i32 %xx, 0
25-
br i1 %cond0, label %F, label %exit, !prof !0
26+
br i1 %cond0, label %F, label %exit, !prof !15
2627

2728
F:
2829
%inc = add nsw i32 %xx, 2
2930
%aa = mul nsw i32 %b, %inc
3031
%exitcond = icmp slt i32 %inc, %a
31-
br i1 %exitcond, label %B, label %G, !prof !1
32+
br i1 %exitcond, label %B, label %G, !prof !16
3233

3334
G:
3435
%ii = add nsw i32 %aa, %a
3536
%ll = add i32 %b, 45
3637
%exitcond2 = icmp sge i32 %ii, %b
37-
br i1 %exitcond2, label %G, label %exit, !prof !2
38+
br i1 %exitcond2, label %G, label %exit, !prof !17
3839

3940
exit:
4041
ret i32 0
4142
}
4243

43-
!0 = !{!"branch_weights", i32 4, i32 1}
44-
!1 = !{!"branch_weights", i32 128, i32 1}
45-
!2 = !{!"branch_weights", i32 1, i32 1}
44+
!llvm.module.flags = !{!0}
45+
!0 = !{i32 1, !"ProfileSummary", !1}
46+
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
47+
!2 = !{!"ProfileFormat", !"InstrProf"}
48+
!3 = !{!"TotalCount", i64 10000}
49+
!4 = !{!"MaxCount", i64 10}
50+
!5 = !{!"MaxInternalCount", i64 1}
51+
!6 = !{!"MaxFunctionCount", i64 1000}
52+
!7 = !{!"NumCounts", i64 3}
53+
!8 = !{!"NumFunctions", i64 3}
54+
!9 = !{!"DetailedSummary", !10}
55+
!10 = !{!11, !12, !13}
56+
!11 = !{i32 10000, i64 100, i32 1}
57+
!12 = !{i32 999000, i64 100, i32 1}
58+
!13 = !{i32 999999, i64 1, i32 2}
59+
!14 = !{!"function_entry_count", i64 1000}
60+
!15 = !{!"branch_weights", i32 4, i32 1}
61+
!16 = !{!"branch_weights", i32 128, i32 1}
62+
!17 = !{!"branch_weights", i32 1, i32 1}

0 commit comments

Comments
 (0)