Skip to content

[MachineSink] Sink into consistent blocks for optsize funcs #115367

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions llvm/lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
Expand All @@ -38,6 +39,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
Expand Down Expand Up @@ -122,6 +124,7 @@ namespace {
MachineDominatorTree *DT = nullptr; // Machine dominator tree
MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
MachineCycleInfo *CI = nullptr;
ProfileSummaryInfo *PSI = nullptr;
MachineBlockFrequencyInfo *MBFI = nullptr;
const MachineBranchProbabilityInfo *MBPI = nullptr;
AliasAnalysis *AA = nullptr;
Expand Down Expand Up @@ -198,6 +201,7 @@ namespace {
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addPreserved<MachineCycleInfoWrapperPass>();
AU.addPreserved<MachineLoopInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
Expand Down Expand Up @@ -284,6 +288,7 @@ char &llvm::MachineSinkingID = MachineSinking::ID;

INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
Expand Down Expand Up @@ -722,6 +727,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MBFI = UseBlockFreqInfo
? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()
: nullptr;
Expand Down Expand Up @@ -1217,12 +1223,12 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,

// Sort Successors according to their cycle depth or block frequency info.
llvm::stable_sort(
AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
AllSuccs, [&](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0;
return HasBlockFreq ? LHSFreq < RHSFreq
: CI->getCycleDepth(L) < CI->getCycleDepth(R);
if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) || !LHSFreq || !RHSFreq)
return CI->getCycleDepth(L) < CI->getCycleDepth(R);
return LHSFreq < RHSFreq;
});

auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
Expand Down
31 changes: 24 additions & 7 deletions llvm/test/CodeGen/X86/sink-blockfreq.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
; RUN: llc -disable-preheader-prot=true -disable-machine-licm -machine-sink-bfi=true -force-pgso -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI

; Test that by changing BlockFrequencyInfo we change the order in which
; machine-sink looks for successor blocks. By not using BFI, both G and B
; have the same loop depth and no instructions is sinked - B is selected but
; can't be used as to avoid breaking a non profitable critical edge. By using
; BFI, "mul" is sinked into the less frequent block G.
define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp !prof !14 {
; MSINK_BFI-LABEL: sink_freqinfo
; MSINK_BFI: jl
; MSINK_BFI-NEXT: ## %bb.
Expand All @@ -22,24 +23,40 @@ B:
%ee = phi i32 [ 0, %entry ], [ %inc, %F ]
%xx = sub i32 %a, %ee
%cond0 = icmp slt i32 %xx, 0
br i1 %cond0, label %F, label %exit, !prof !0
br i1 %cond0, label %F, label %exit, !prof !15

F:
%inc = add nsw i32 %xx, 2
%aa = mul nsw i32 %b, %inc
%exitcond = icmp slt i32 %inc, %a
br i1 %exitcond, label %B, label %G, !prof !1
br i1 %exitcond, label %B, label %G, !prof !16

G:
%ii = add nsw i32 %aa, %a
%ll = add i32 %b, 45
%exitcond2 = icmp sge i32 %ii, %b
br i1 %exitcond2, label %G, label %exit, !prof !2
br i1 %exitcond2, label %G, label %exit, !prof !17

exit:
ret i32 0
}

!0 = !{!"branch_weights", i32 4, i32 1}
!1 = !{!"branch_weights", i32 128, i32 1}
!2 = !{!"branch_weights", i32 1, i32 1}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 1000}
!15 = !{!"branch_weights", i32 4, i32 1}
!16 = !{!"branch_weights", i32 128, i32 1}
!17 = !{!"branch_weights", i32 1, i32 1}
Loading