Skip to content

[regalloc][basic] Change spill weight for optsize funcs #112960

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/include/llvm/CodeGen/CalcSpillWeights.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class LiveIntervals;
class MachineBlockFrequencyInfo;
class MachineFunction;
class MachineLoopInfo;
class ProfileSummaryInfo;
class VirtRegMap;

/// Normalize the spill weight of a live interval
Expand Down Expand Up @@ -47,6 +48,7 @@ class VirtRegMap;
LiveIntervals &LIS;
const VirtRegMap &VRM;
const MachineLoopInfo &Loops;
ProfileSummaryInfo *PSI;
const MachineBlockFrequencyInfo &MBFI;

/// Returns true if Reg of live interval LI is used in instruction with many
Expand All @@ -56,8 +58,9 @@ class VirtRegMap;
public:
VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS,
const VirtRegMap &VRM, const MachineLoopInfo &Loops,
const MachineBlockFrequencyInfo &MBFI)
: MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {}
const MachineBlockFrequencyInfo &MBFI,
ProfileSummaryInfo *PSI = nullptr)
: MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), PSI(PSI), MBFI(MBFI) {}

virtual ~VirtRegAuxInfo() = default;

Expand Down
9 changes: 7 additions & 2 deletions llvm/include/llvm/CodeGen/LiveIntervals.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class MachineDominatorTree;
class MachineFunction;
class MachineInstr;
class MachineRegisterInfo;
class ProfileSummaryInfo;
class raw_ostream;
class TargetInstrInfo;
class VirtRegMap;
Expand Down Expand Up @@ -113,14 +114,18 @@ class LiveIntervals {
~LiveIntervals();

/// Calculate the spill weight to assign to a single instruction.
/// If \p PSI is provided the calculation is altered for optsize functions.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that this may be confusing, but I thought it was less disruptive than adding a separate ConsiderOptSize parameter

static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineInstr &MI);
const MachineInstr &MI,
ProfileSummaryInfo *PSI = nullptr);

/// Calculate the spill weight to assign to a single instruction.
/// If \p PSI is provided the calculation is altered for optsize functions.
static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB);
const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI = nullptr);

LiveInterval &getInterval(Register Reg) {
if (hasInterval(Reg))
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/CodeGen/CalcSpillWeights.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// localLI = COPY other
// ...
// other = COPY localLI
TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB);
TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB);
TotalWeight +=
LiveIntervals::getSpillWeight(true, false, &MBFI, LocalMBB, PSI);
TotalWeight +=
LiveIntervals::getSpillWeight(false, true, &MBFI, LocalMBB, PSI);

NumInstr += 2;
}
Expand Down Expand Up @@ -272,7 +274,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
// Calculate instr weight.
bool Reads, Writes;
std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI);
Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI, PSI);

// Give extra weight to what looks like a loop induction variable update.
if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))
Expand Down
19 changes: 15 additions & 4 deletions llvm/lib/CodeGen/LiveIntervals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
Expand Down Expand Up @@ -875,14 +877,23 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {

float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineInstr &MI) {
return getSpillWeight(isDef, isUse, MBFI, MI.getParent());
const MachineInstr &MI,
ProfileSummaryInfo *PSI) {
return getSpillWeight(isDef, isUse, MBFI, MI.getParent(), PSI);
}

float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB) {
return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI) {
float Weight = isDef + isUse;
const auto *MF = MBB->getParent();
// When optimizing for size we only consider the codesize impact of spilling
// the register, not the runtime impact.
if (PSI && (MF->getFunction().hasOptSize() ||
llvm::shouldOptimizeForSize(MF, PSI, MBFI)))
return Weight;
return Weight * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
}

LiveRange::Segment
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/RegAllocBasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "AllocationOrder.h"
#include "RegAllocBase.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
Expand Down Expand Up @@ -140,6 +141,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
false)

Expand Down Expand Up @@ -182,6 +184,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveDebugVariables>();
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequiredID(MachineDominatorsID);
Expand Down Expand Up @@ -312,7 +315,8 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
getAnalysis<LiveRegMatrix>());
VirtRegAuxInfo VRAI(
*MF, *LIS, *VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(),
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(),
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
VRAI.calculateSpillWeightsAndHints();

SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, VRAI));
Expand Down
168 changes: 168 additions & 0 deletions llvm/test/CodeGen/AArch64/regalloc-spill-weight-basic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py

; RUN: llc < %s -mtriple=aarch64 -regalloc=basic | FileCheck %s

; Test that the register allocator behaves differently with minsize functions.

declare void @foo(i32, ptr)

define void @optsize(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) minsize {
; CHECK-LABEL: optsize:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w30, -48
; CHECK-NEXT: mov w23, w5
; CHECK-NEXT: mov x22, x4
; CHECK-NEXT: mov x21, x3
; CHECK-NEXT: mov x20, x2
; CHECK-NEXT: mov w19, w1
Comment on lines +22 to +26
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's interesting that the minsize spill weight seems to produce more regular assembly. This might improve outlining which would explain the size win.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, I'd consider the basic RA for the size optimization in general when the target enables the machine outliner by default.

; CHECK-NEXT: .LBB0_1: // %bb8
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cbz w19, .LBB0_1
; CHECK-NEXT: // %bb.2: // %bb8
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: cmp w19, #39
; CHECK-NEXT: b.eq .LBB0_6
; CHECK-NEXT: // %bb.3: // %bb8
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: cmp w19, #34
; CHECK-NEXT: b.eq .LBB0_6
; CHECK-NEXT: // %bb.4: // %bb8
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: cmp w19, #10
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.5: // %bb9
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: str wzr, [x20]
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_6: // %bb10
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: mov w0, w23
; CHECK-NEXT: mov x1, x21
; CHECK-NEXT: str wzr, [x22]
; CHECK-NEXT: bl foo
; CHECK-NEXT: b .LBB0_1
bb:
br label %bb7

bb7: ; preds = %bb13, %bb
%phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ]
br label %bb8

bb8: ; preds = %bb10, %bb9, %bb8, %bb7
switch i32 %arg1, label %bb8 [
i32 10, label %bb9
i32 1, label %bb16
i32 0, label %bb13
i32 39, label %bb10
i32 34, label %bb10
]

bb9: ; preds = %bb8
store i32 0, ptr %arg2, align 4
br label %bb8

bb10: ; preds = %bb8, %bb8
store i32 0, ptr %arg4, align 4
tail call void @foo(i32 %arg5, ptr %arg3)
br label %bb8

bb13: ; preds = %bb8
%not.arg6 = xor i1 %arg6, true
%spec.select = zext i1 %not.arg6 to i32
br label %bb7

bb16: ; preds = %bb8
unreachable
}

define void @optspeed(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %arg5, i1 %arg6) {
; CHECK-LABEL: optspeed:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w30, -48
; CHECK-NEXT: mov w22, w5
; CHECK-NEXT: mov x21, x4
; CHECK-NEXT: mov x20, x3
; CHECK-NEXT: mov x23, x2
; CHECK-NEXT: mov w19, w1
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_1: // %bb10
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: mov w0, w22
; CHECK-NEXT: mov x1, x20
; CHECK-NEXT: str wzr, [x21]
; CHECK-NEXT: bl foo
; CHECK-NEXT: .LBB1_2: // %bb8
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmp w19, #33
; CHECK-NEXT: b.gt .LBB1_6
; CHECK-NEXT: // %bb.3: // %bb8
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: cbz w19, .LBB1_2
; CHECK-NEXT: // %bb.4: // %bb8
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: cmp w19, #10
; CHECK-NEXT: b.ne .LBB1_2
; CHECK-NEXT: // %bb.5: // %bb9
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: str wzr, [x23]
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_6: // %bb8
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: cmp w19, #34
; CHECK-NEXT: b.eq .LBB1_1
; CHECK-NEXT: // %bb.7: // %bb8
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: cmp w19, #39
; CHECK-NEXT: b.eq .LBB1_1
; CHECK-NEXT: b .LBB1_2
bb:
br label %bb7

bb7: ; preds = %bb13, %bb
%phi = phi i32 [ 0, %bb ], [ %spec.select, %bb13 ]
br label %bb8

bb8: ; preds = %bb10, %bb9, %bb8, %bb7
switch i32 %arg1, label %bb8 [
i32 10, label %bb9
i32 1, label %bb16
i32 0, label %bb13
i32 39, label %bb10
i32 34, label %bb10
]

bb9: ; preds = %bb8
store i32 0, ptr %arg2, align 4
br label %bb8

bb10: ; preds = %bb8, %bb8
store i32 0, ptr %arg4, align 4
tail call void @foo(i32 %arg5, ptr %arg3)
br label %bb8

bb13: ; preds = %bb8
%not.arg6 = xor i1 %arg6, true
%spec.select = zext i1 %not.arg6 to i32
br label %bb7

bb16: ; preds = %bb8
unreachable
}
Loading