Skip to content

Commit af6ff98

Browse files
authored
[AMDGPU] Move WWM register pre-allocation to during regalloc (#70618)
Move SIPreAllocateWWMRegs pass to just before VGPR allocation. This saves recomputation of the virtual matrix and live reg map, with the slight regression in O0 that live intervals and slot indexes must be computed.
1 parent 182b7e6 commit af6ff98

File tree

6 files changed

+218
-214
lines changed

6 files changed

+218
-214
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,7 +1281,6 @@ void GCNPassConfig::addFastRegAlloc() {
12811281
insertPass(&PHIEliminationID, &SILowerControlFlowID);
12821282

12831283
insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
1284-
insertPass(&TwoAddressInstructionPassID, &SIPreAllocateWWMRegsID);
12851284

12861285
TargetPassConfig::addFastRegAlloc();
12871286
}
@@ -1290,7 +1289,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
12901289
// Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
12911290
// instructions that cause scheduling barriers.
12921291
insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1293-
insertPass(&MachineSchedulerID, &SIPreAllocateWWMRegsID);
12941292

12951293
if (OptExecMaskPreRA)
12961294
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
@@ -1377,6 +1375,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
13771375

13781376
// Equivalent of PEI for SGPRs.
13791377
addPass(&SILowerSGPRSpillsID);
1378+
addPass(&SIPreAllocateWWMRegsID);
13801379

13811380
addPass(createVGPRAllocPass(false));
13821381

@@ -1400,6 +1399,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14001399

14011400
// Equivalent of PEI for SGPRs.
14021401
addPass(&SILowerSGPRSpillsID);
1402+
addPass(&SIPreAllocateWWMRegsID);
14031403

14041404
addPass(createVGPRAllocPass(true));
14051405

llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,9 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
5656

5757
void getAnalysisUsage(AnalysisUsage &AU) const override {
5858
AU.addRequired<LiveIntervals>();
59-
AU.addPreserved<LiveIntervals>();
6059
AU.addRequired<VirtRegMap>();
6160
AU.addRequired<LiveRegMatrix>();
62-
AU.addPreserved<SlotIndexes>();
63-
AU.setPreservesCFG();
61+
AU.setPreservesAll();
6462
MachineFunctionPass::getAnalysisUsage(AU);
6563
}
6664

@@ -101,7 +99,7 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
10199
LiveInterval &LI = LIS->getInterval(Reg);
102100

103101
for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
104-
if (!MRI->isPhysRegUsed(PhysReg) &&
102+
if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
105103
Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
106104
Matrix->assign(LI, PhysReg);
107105
assert(PhysReg != 0);
@@ -217,6 +215,9 @@ bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
217215
MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
218216
RegsAssigned |= processDef(MI.getOperand(0));
219217

218+
if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
219+
continue;
220+
220221
if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
221222
MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM ||
222223
MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) {

llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
1111
; REGALLOC-NEXT: renamable $vgpr3 = IMPLICIT_DEF
1212
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
1313
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
14-
; REGALLOC-NEXT: renamable $vgpr1 = COPY killed $vgpr0
14+
; REGALLOC-NEXT: renamable $vgpr1 = COPY $vgpr0
1515
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
1616
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 49
1717
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr1, killed $sgpr4, implicit $exec

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,14 @@
106106
; GCN-O0-NEXT: Slot index numbering
107107
; GCN-O0-NEXT: Live Interval Analysis
108108
; GCN-O0-NEXT: SI Whole Quad Mode
109-
; GCN-O0-NEXT: Virtual Register Map
110-
; GCN-O0-NEXT: Live Register Matrix
111-
; GCN-O0-NEXT: SI Pre-allocate WWM Registers
112109
; GCN-O0-NEXT: AMDGPU Pre-RA Long Branch Reg
113110
; GCN-O0-NEXT: Fast Register Allocator
114111
; GCN-O0-NEXT: SI lower SGPR spill instructions
112+
; GCN-O0-NEXT: Slot index numbering
113+
; GCN-O0-NEXT: Live Interval Analysis
114+
; GCN-O0-NEXT: Virtual Register Map
115+
; GCN-O0-NEXT: Live Register Matrix
116+
; GCN-O0-NEXT: SI Pre-allocate WWM Registers
115117
; GCN-O0-NEXT: Fast Register Allocator
116118
; GCN-O0-NEXT: SI Lower WWM Copies
117119
; GCN-O0-NEXT: SI Fix VGPR copies
@@ -334,9 +336,6 @@
334336
; GCN-O1-NEXT: Rename Disconnected Subregister Components
335337
; GCN-O1-NEXT: Machine Instruction Scheduler
336338
; GCN-O1-NEXT: SI Whole Quad Mode
337-
; GCN-O1-NEXT: Virtual Register Map
338-
; GCN-O1-NEXT: Live Register Matrix
339-
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
340339
; GCN-O1-NEXT: SI optimize exec mask operations pre-RA
341340
; GCN-O1-NEXT: AMDGPU Pre-RA Long Branch Reg
342341
; GCN-O1-NEXT: Machine Natural Loop Construction
@@ -354,6 +353,7 @@
354353
; GCN-O1-NEXT: SI lower SGPR spill instructions
355354
; GCN-O1-NEXT: Virtual Register Map
356355
; GCN-O1-NEXT: Live Register Matrix
356+
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
357357
; GCN-O1-NEXT: Greedy Register Allocator
358358
; GCN-O1-NEXT: SI Lower WWM Copies
359359
; GCN-O1-NEXT: GCN NSA Reassign
@@ -625,9 +625,6 @@
625625
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations
626626
; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler
627627
; GCN-O1-OPTS-NEXT: SI Whole Quad Mode
628-
; GCN-O1-OPTS-NEXT: Virtual Register Map
629-
; GCN-O1-OPTS-NEXT: Live Register Matrix
630-
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
631628
; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA
632629
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg
633630
; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction
@@ -645,6 +642,7 @@
645642
; GCN-O1-OPTS-NEXT: SI lower SGPR spill instructions
646643
; GCN-O1-OPTS-NEXT: Virtual Register Map
647644
; GCN-O1-OPTS-NEXT: Live Register Matrix
645+
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
648646
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
649647
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
650648
; GCN-O1-OPTS-NEXT: GCN NSA Reassign
@@ -927,9 +925,6 @@
927925
; GCN-O2-NEXT: AMDGPU Pre-RA optimizations
928926
; GCN-O2-NEXT: Machine Instruction Scheduler
929927
; GCN-O2-NEXT: SI Whole Quad Mode
930-
; GCN-O2-NEXT: Virtual Register Map
931-
; GCN-O2-NEXT: Live Register Matrix
932-
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
933928
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
934929
; GCN-O2-NEXT: SI Form memory clauses
935930
; GCN-O2-NEXT: AMDGPU Pre-RA Long Branch Reg
@@ -948,6 +943,7 @@
948943
; GCN-O2-NEXT: SI lower SGPR spill instructions
949944
; GCN-O2-NEXT: Virtual Register Map
950945
; GCN-O2-NEXT: Live Register Matrix
946+
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
951947
; GCN-O2-NEXT: Greedy Register Allocator
952948
; GCN-O2-NEXT: SI Lower WWM Copies
953949
; GCN-O2-NEXT: GCN NSA Reassign
@@ -1242,9 +1238,6 @@
12421238
; GCN-O3-NEXT: AMDGPU Pre-RA optimizations
12431239
; GCN-O3-NEXT: Machine Instruction Scheduler
12441240
; GCN-O3-NEXT: SI Whole Quad Mode
1245-
; GCN-O3-NEXT: Virtual Register Map
1246-
; GCN-O3-NEXT: Live Register Matrix
1247-
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
12481241
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
12491242
; GCN-O3-NEXT: SI Form memory clauses
12501243
; GCN-O3-NEXT: AMDGPU Pre-RA Long Branch Reg
@@ -1263,6 +1256,7 @@
12631256
; GCN-O3-NEXT: SI lower SGPR spill instructions
12641257
; GCN-O3-NEXT: Virtual Register Map
12651258
; GCN-O3-NEXT: Live Register Matrix
1259+
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
12661260
; GCN-O3-NEXT: Greedy Register Allocator
12671261
; GCN-O3-NEXT: SI Lower WWM Copies
12681262
; GCN-O3-NEXT: GCN NSA Reassign

llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
; DEFAULT-NEXT: SI lower SGPR spill instructions
2121
; DEFAULT-NEXT: Virtual Register Map
2222
; DEFAULT-NEXT: Live Register Matrix
23+
; DEFAULT-NEXT: SI Pre-allocate WWM Registers
2324
; DEFAULT-NEXT: Greedy Register Allocator
2425
; DEFAULT-NEXT: SI Lower WWM Copies
2526
; DEFAULT-NEXT: GCN NSA Reassign
@@ -28,6 +29,11 @@
2829

2930
; O0: Fast Register Allocator
3031
; O0-NEXT: SI lower SGPR spill instructions
32+
; O0-NEXT: Slot index numbering
33+
; O0-NEXT: Live Interval Analysis
34+
; O0-NEXT: Virtual Register Map
35+
; O0-NEXT: Live Register Matrix
36+
; O0-NEXT: SI Pre-allocate WWM Registers
3137
; O0-NEXT: Fast Register Allocator
3238
; O0-NEXT: SI Lower WWM Copies
3339
; O0-NEXT: SI Fix VGPR copies
@@ -46,6 +52,7 @@
4652
; BASIC-DEFAULT-NEXT: SI lower SGPR spill instructions
4753
; BASIC-DEFAULT-NEXT: Virtual Register Map
4854
; BASIC-DEFAULT-NEXT: Live Register Matrix
55+
; BASIC-DEFAULT-NEXT: SI Pre-allocate WWM Registers
4956
; BASIC-DEFAULT-NEXT: Bundle Machine CFG Edges
5057
; BASIC-DEFAULT-NEXT: Spill Code Placement Analysis
5158
; BASIC-DEFAULT-NEXT: Lazy Machine Block Frequency Analysis
@@ -63,6 +70,7 @@
6370
; DEFAULT-BASIC-NEXT: SI lower SGPR spill instructions
6471
; DEFAULT-BASIC-NEXT: Virtual Register Map
6572
; DEFAULT-BASIC-NEXT: Live Register Matrix
73+
; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
6674
; DEFAULT-BASIC-NEXT: Basic Register Allocator
6775
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
6876
; DEFAULT-BASIC-NEXT: GCN NSA Reassign
@@ -82,6 +90,7 @@
8290
; BASIC-BASIC-NEXT: SI lower SGPR spill instructions
8391
; BASIC-BASIC-NEXT: Virtual Register Map
8492
; BASIC-BASIC-NEXT: Live Register Matrix
93+
; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
8594
; BASIC-BASIC-NEXT: Basic Register Allocator
8695
; BASIC-BASIC-NEXT: SI Lower WWM Copies
8796
; BASIC-BASIC-NEXT: GCN NSA Reassign

0 commit comments

Comments
 (0)