-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Track physical VGPRs used for SGPR spills #75573
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Carl Ritson (perlfu) ChangesPhysical VGPRs used for SGPR spills need to be tracked independent of WWM reserved registers. The WWM reserved set contains extra registers allocated during WWM pre-allocation pass. This causes SGPR spills allocated after WWM pre-allocation to overlap with WWM register usage, e.g. if frame pointer is spilt during prologue/epilog insertion. Full diff: https://github.com/llvm/llvm-project/pull/75573.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 48c341917ddec7..e8142244b7db69 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -349,8 +349,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
MBB.addLiveIn(LaneVGPR);
MBB.sortUniqueLiveIns();
}
+ SpillPhysVGPRs.push_back(LaneVGPR);
} else {
- LaneVGPR = WWMReservedRegs.back();
+ LaneVGPR = SpillPhysVGPRs.back();
}
SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7ff50c80081d30..dc63ae44c528db 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -502,6 +502,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
unsigned NumVirtualVGPRSpillLanes = 0;
unsigned NumPhysicalVGPRSpillLanes = 0;
SmallVector<Register, 2> SpillVGPRs;
+ SmallVector<Register, 2> SpillPhysVGPRs;
using WWMSpillsMap = MapVector<Register, int>;
// To track the registers used in instructions that can potentially modify the
// inactive lanes. The WWM instructions and the writelane instructions for
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
new file mode 100644
index 00000000000000..1473e667f894cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -0,0 +1,228 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-unknown-amdpal -mcpu=gfx1030 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs --stress-regalloc=2 -o - %s | FileCheck -check-prefix GCN %s
+
+--- |
+ define amdgpu_gfx [13 x i32] @test_main() #0 {
+ ret [13 x i32] poison
+ }
+
+ attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="32,32" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize32,+cumode,+enable-flat-scratch" "uniform-work-group-size"="false" }
+...
+---
+
+name: test_main
+tracksRegLiveness: true
+frameInfo:
+ hasCalls: true
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
+ stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ hasSpilledSGPRs: true
+ hasSpilledVGPRs: false
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ sgprForEXECCopy: '$sgpr105'
+body: |
+ ; GCN-LABEL: name: test_main
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr0 = COPY $sgpr33
+ ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32
+ ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr3
+ ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
+ ; GCN-NEXT: renamable $vgpr5 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr1
+ ; GCN-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr1
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr2
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr3
+ ; GCN-NEXT: $sgpr22 = IMPLICIT_DEF
+ ; GCN-NEXT: renamable $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5
+ ; GCN-NEXT: dead $vgpr4 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.2(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: successors: %bb.3(0x80000000)
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3:
+ ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3
+ ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2
+ ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1
+ ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0
+ ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31
+ ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30
+ ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29
+ ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28
+ ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27
+ ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26
+ ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25
+ ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24
+ ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23
+ ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 22
+ ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 21
+ ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 20
+ ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 19
+ ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 18
+ ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 17
+ ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 16
+ ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 15
+ ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 14
+ ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 13
+ ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 12
+ ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 11
+ ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 10
+ ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 9
+ ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 8
+ ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 7
+ ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 6
+ ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 5
+ ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 4
+ ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 3
+ ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 2
+ ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
+ ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
+ ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 31
+ ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 30
+ ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 29
+ ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 28
+ ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 27
+ ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 26
+ ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 25
+ ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 24
+ ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 23
+ ; GCN-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 22
+ ; GCN-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 21
+ ; GCN-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 20
+ ; GCN-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 19
+ ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 18
+ ; GCN-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 17
+ ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 16
+ ; GCN-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 15
+ ; GCN-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 14
+ ; GCN-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 13
+ ; GCN-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 12
+ ; GCN-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 11
+ ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 10
+ ; GCN-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 9
+ ; GCN-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 8
+ ; GCN-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 7
+ ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 6
+ ; GCN-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 5
+ ; GCN-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 4
+ ; GCN-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
+ ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2
+ ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1
+ ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0
+ ; GCN-NEXT: KILL killed renamable $vgpr5
+ ; GCN-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
+ ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
+ ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
+ ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
+ ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
+ ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
+ ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
+ ; GCN-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc
+ ; GCN-NEXT: $sgpr33 = COPY $sgpr0
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0
+
+ $sgpr22 = IMPLICIT_DEF
+ SI_SPILL_S32_SAVE $sgpr22, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+ %0:vgpr_32 = V_SET_INACTIVE_B32 $vgpr0, 0, implicit $exec, implicit-def $scc
+
+ bb.1:
+ KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+
+ bb.2:
+ renamable $sgpr22 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+
+ bb.3:
+ S_ENDPGM 0
+
+...
|
Shows spill allocation overlapping with WWM register use.
Physical VGPRs used for SGPR spills need to be tracked independent of WWM reserved registers. The WWM reserved set contains extra registers allocated during WWM pre-allocation pass. This causes SGPR spills allocated after WWM pre-allocation to overlap with WWM register usage, e.g. if frame pointer is spilt during prologue/epilog insertion.
bf0b3cd
to
cf516fe
Compare
Physical VGPRs used for SGPR spills need to be tracked independent of WWM reserved registers. The WWM reserved set contains extra registers allocated during WWM pre-allocation pass.
This causes SGPR spills allocated after WWM pre-allocation to overlap with WWM register usage, e.g. if frame pointer is spilt during prologue/epilog insertion.