Skip to content

Commit c10cc4e

Browse files
committed
[AMDGPU] Fix computing live registers in prolog
ScratchExecCopy needs to be marked as live, we cannot use that register while EXEC is stored in there. Marking SGPRForFPSaveRestoreCopy and SGPRForBPSaveRestoreCopy as available is unnecessary, they should not be live at that point anway. Differential Revision: https://reviews.llvm.org/D100098
1 parent 6fccfd7 commit c10cc4e

File tree

2 files changed

+5
-12
lines changed

2 files changed

+5
-12
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -803,18 +803,12 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
803803
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
804804
const SIInstrInfo *TII = ST.getInstrInfo();
805805
const SIRegisterInfo &TRI = TII->getRegisterInfo();
806-
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
807806
DebugLoc DL;
808807

809808
if (LiveRegs.empty()) {
810809
if (IsProlog) {
811810
LiveRegs.init(TRI);
812811
LiveRegs.addLiveIns(MBB);
813-
if (FuncInfo->SGPRForFPSaveRestoreCopy)
814-
LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
815-
816-
if (FuncInfo->SGPRForBPSaveRestoreCopy)
817-
LiveRegs.removeReg(FuncInfo->SGPRForBPSaveRestoreCopy);
818812
} else {
819813
// In epilog.
820814
LiveRegs.init(*ST.getRegisterInfo());
@@ -828,8 +822,7 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
828822
if (!ScratchExecCopy)
829823
report_fatal_error("failed to find free scratch register");
830824

831-
if (!IsProlog)
832-
LiveRegs.removeReg(ScratchExecCopy);
825+
LiveRegs.addReg(ScratchExecCopy);
833826

834827
const unsigned OrSaveExec =
835828
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;

llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ body: |
7979
; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
8080
; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
8181
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
82-
; GFX9-FLATSCR: $sgpr4 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
83-
; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
82+
; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
83+
; GFX9-FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.3, addrspace 5)
8484
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
8585
; GFX9-FLATSCR: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
8686
; GFX9-FLATSCR: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 8191, implicit-def $scc
@@ -92,8 +92,8 @@ body: |
9292
; GFX9-FLATSCR: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 24576, implicit-def $scc
9393
; GFX9-FLATSCR: $sgpr33 = V_READLANE_B32 $vgpr2, 0
9494
; GFX9-FLATSCR: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
95-
; GFX9-FLATSCR: $sgpr4 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
96-
; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
95+
; GFX9-FLATSCR: $sgpr6 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
96+
; GFX9-FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.3, addrspace 5)
9797
; GFX9-FLATSCR: $exec = S_MOV_B64 killed $sgpr4_sgpr5
9898
; GFX9-FLATSCR: S_ENDPGM 0, csr_amdgpu_allvgprs
9999
$vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec

0 commit comments

Comments
 (0)