Skip to content

Commit 04174a2

Browse files
committed
[AMDGPU] Move WWM register pre-allocation to during regalloc
Move SIPreAllocateWWMRegs pass to just before VGPR allocation. This saves recomputation of the virtual matrix and live reg map, with the slight regression in O0 that live intervals and slot indexes must be computed.
1 parent b3523d7 commit 04174a2

11 files changed

+2563
-2655
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,7 +1277,6 @@ void GCNPassConfig::addFastRegAlloc() {
12771277
insertPass(&PHIEliminationID, &SILowerControlFlowID);
12781278

12791279
insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
1280-
insertPass(&TwoAddressInstructionPassID, &SIPreAllocateWWMRegsID);
12811280

12821281
TargetPassConfig::addFastRegAlloc();
12831282
}
@@ -1286,7 +1285,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
12861285
// Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
12871286
// instructions that cause scheduling barriers.
12881287
insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1289-
insertPass(&MachineSchedulerID, &SIPreAllocateWWMRegsID);
12901288

12911289
if (OptExecMaskPreRA)
12921290
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
@@ -1373,6 +1371,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
13731371

13741372
// Equivalent of PEI for SGPRs.
13751373
addPass(&SILowerSGPRSpillsID);
1374+
addPass(&SIPreAllocateWWMRegsID);
13761375

13771376
addPass(createVGPRAllocPass(false));
13781377

@@ -1396,6 +1395,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
13961395

13971396
// Equivalent of PEI for SGPRs.
13981397
addPass(&SILowerSGPRSpillsID);
1398+
addPass(&SIPreAllocateWWMRegsID);
13991399

14001400
addPass(createVGPRAllocPass(true));
14011401

llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,9 @@ class SIPreAllocateWWMRegs : public MachineFunctionPass {
5656

5757
void getAnalysisUsage(AnalysisUsage &AU) const override {
5858
AU.addRequired<LiveIntervals>();
59-
AU.addPreserved<LiveIntervals>();
6059
AU.addRequired<VirtRegMap>();
6160
AU.addRequired<LiveRegMatrix>();
62-
AU.addPreserved<SlotIndexes>();
63-
AU.setPreservesCFG();
61+
AU.setPreservesAll();
6462
MachineFunctionPass::getAnalysisUsage(AU);
6563
}
6664

llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
1111
; REGALLOC-NEXT: renamable $vgpr3 = IMPLICIT_DEF
1212
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
1313
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
14-
; REGALLOC-NEXT: renamable $vgpr1 = COPY killed $vgpr0
14+
; REGALLOC-NEXT: renamable $vgpr1 = COPY $vgpr0
1515
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
1616
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 49
1717
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr1, killed $sgpr4, implicit $exec

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll

Lines changed: 576 additions & 576 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll

Lines changed: 462 additions & 465 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll

Lines changed: 462 additions & 465 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll

Lines changed: 600 additions & 600 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,14 @@
106106
; GCN-O0-NEXT: Slot index numbering
107107
; GCN-O0-NEXT: Live Interval Analysis
108108
; GCN-O0-NEXT: SI Whole Quad Mode
109-
; GCN-O0-NEXT: Virtual Register Map
110-
; GCN-O0-NEXT: Live Register Matrix
111-
; GCN-O0-NEXT: SI Pre-allocate WWM Registers
112109
; GCN-O0-NEXT: AMDGPU Pre-RA Long Branch Reg
113110
; GCN-O0-NEXT: Fast Register Allocator
114111
; GCN-O0-NEXT: SI lower SGPR spill instructions
112+
; GCN-O0-NEXT: Slot index numbering
113+
; GCN-O0-NEXT: Live Interval Analysis
114+
; GCN-O0-NEXT: Virtual Register Map
115+
; GCN-O0-NEXT: Live Register Matrix
116+
; GCN-O0-NEXT: SI Pre-allocate WWM Registers
115117
; GCN-O0-NEXT: Fast Register Allocator
116118
; GCN-O0-NEXT: SI Lower WWM Copies
117119
; GCN-O0-NEXT: SI Fix VGPR copies
@@ -334,9 +336,6 @@
334336
; GCN-O1-NEXT: Rename Disconnected Subregister Components
335337
; GCN-O1-NEXT: Machine Instruction Scheduler
336338
; GCN-O1-NEXT: SI Whole Quad Mode
337-
; GCN-O1-NEXT: Virtual Register Map
338-
; GCN-O1-NEXT: Live Register Matrix
339-
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
340339
; GCN-O1-NEXT: SI optimize exec mask operations pre-RA
341340
; GCN-O1-NEXT: AMDGPU Pre-RA Long Branch Reg
342341
; GCN-O1-NEXT: Machine Natural Loop Construction
@@ -354,6 +353,7 @@
354353
; GCN-O1-NEXT: SI lower SGPR spill instructions
355354
; GCN-O1-NEXT: Virtual Register Map
356355
; GCN-O1-NEXT: Live Register Matrix
356+
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
357357
; GCN-O1-NEXT: Greedy Register Allocator
358358
; GCN-O1-NEXT: SI Lower WWM Copies
359359
; GCN-O1-NEXT: GCN NSA Reassign
@@ -625,9 +625,6 @@
625625
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations
626626
; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler
627627
; GCN-O1-OPTS-NEXT: SI Whole Quad Mode
628-
; GCN-O1-OPTS-NEXT: Virtual Register Map
629-
; GCN-O1-OPTS-NEXT: Live Register Matrix
630-
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
631628
; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA
632629
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg
633630
; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction
@@ -645,6 +642,7 @@
645642
; GCN-O1-OPTS-NEXT: SI lower SGPR spill instructions
646643
; GCN-O1-OPTS-NEXT: Virtual Register Map
647644
; GCN-O1-OPTS-NEXT: Live Register Matrix
645+
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
648646
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
649647
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
650648
; GCN-O1-OPTS-NEXT: GCN NSA Reassign
@@ -927,9 +925,6 @@
927925
; GCN-O2-NEXT: AMDGPU Pre-RA optimizations
928926
; GCN-O2-NEXT: Machine Instruction Scheduler
929927
; GCN-O2-NEXT: SI Whole Quad Mode
930-
; GCN-O2-NEXT: Virtual Register Map
931-
; GCN-O2-NEXT: Live Register Matrix
932-
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
933928
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
934929
; GCN-O2-NEXT: SI Form memory clauses
935930
; GCN-O2-NEXT: AMDGPU Pre-RA Long Branch Reg
@@ -948,6 +943,7 @@
948943
; GCN-O2-NEXT: SI lower SGPR spill instructions
949944
; GCN-O2-NEXT: Virtual Register Map
950945
; GCN-O2-NEXT: Live Register Matrix
946+
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
951947
; GCN-O2-NEXT: Greedy Register Allocator
952948
; GCN-O2-NEXT: SI Lower WWM Copies
953949
; GCN-O2-NEXT: GCN NSA Reassign
@@ -1242,9 +1238,6 @@
12421238
; GCN-O3-NEXT: AMDGPU Pre-RA optimizations
12431239
; GCN-O3-NEXT: Machine Instruction Scheduler
12441240
; GCN-O3-NEXT: SI Whole Quad Mode
1245-
; GCN-O3-NEXT: Virtual Register Map
1246-
; GCN-O3-NEXT: Live Register Matrix
1247-
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
12481241
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
12491242
; GCN-O3-NEXT: SI Form memory clauses
12501243
; GCN-O3-NEXT: AMDGPU Pre-RA Long Branch Reg
@@ -1263,6 +1256,7 @@
12631256
; GCN-O3-NEXT: SI lower SGPR spill instructions
12641257
; GCN-O3-NEXT: Virtual Register Map
12651258
; GCN-O3-NEXT: Live Register Matrix
1259+
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
12661260
; GCN-O3-NEXT: Greedy Register Allocator
12671261
; GCN-O3-NEXT: SI Lower WWM Copies
12681262
; GCN-O3-NEXT: GCN NSA Reassign

llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
; DEFAULT-NEXT: SI lower SGPR spill instructions
2121
; DEFAULT-NEXT: Virtual Register Map
2222
; DEFAULT-NEXT: Live Register Matrix
23+
; DEFAULT-NEXT: SI Pre-allocate WWM Registers
2324
; DEFAULT-NEXT: Greedy Register Allocator
2425
; DEFAULT-NEXT: SI Lower WWM Copies
2526
; DEFAULT-NEXT: GCN NSA Reassign
@@ -28,6 +29,11 @@
2829

2930
; O0: Fast Register Allocator
3031
; O0-NEXT: SI lower SGPR spill instructions
32+
; O0-NEXT: Slot index numbering
33+
; O0-NEXT: Live Interval Analysis
34+
; O0-NEXT: Virtual Register Map
35+
; O0-NEXT: Live Register Matrix
36+
; O0-NEXT: SI Pre-allocate WWM Registers
3137
; O0-NEXT: Fast Register Allocator
3238
; O0-NEXT: SI Lower WWM Copies
3339
; O0-NEXT: SI Fix VGPR copies
@@ -46,6 +52,7 @@
4652
; BASIC-DEFAULT-NEXT: SI lower SGPR spill instructions
4753
; BASIC-DEFAULT-NEXT: Virtual Register Map
4854
; BASIC-DEFAULT-NEXT: Live Register Matrix
55+
; BASIC-DEFAULT-NEXT: SI Pre-allocate WWM Registers
4956
; BASIC-DEFAULT-NEXT: Bundle Machine CFG Edges
5057
; BASIC-DEFAULT-NEXT: Spill Code Placement Analysis
5158
; BASIC-DEFAULT-NEXT: Lazy Machine Block Frequency Analysis
@@ -63,6 +70,7 @@
6370
; DEFAULT-BASIC-NEXT: SI lower SGPR spill instructions
6471
; DEFAULT-BASIC-NEXT: Virtual Register Map
6572
; DEFAULT-BASIC-NEXT: Live Register Matrix
73+
; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
6674
; DEFAULT-BASIC-NEXT: Basic Register Allocator
6775
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
6876
; DEFAULT-BASIC-NEXT: GCN NSA Reassign
@@ -82,6 +90,7 @@
8290
; BASIC-BASIC-NEXT: SI lower SGPR spill instructions
8391
; BASIC-BASIC-NEXT: Virtual Register Map
8492
; BASIC-BASIC-NEXT: Live Register Matrix
93+
; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
8594
; BASIC-BASIC-NEXT: Basic Register Allocator
8695
; BASIC-BASIC-NEXT: SI Lower WWM Copies
8796
; BASIC-BASIC-NEXT: GCN NSA Reassign

0 commit comments

Comments
 (0)