Skip to content

Commit 7b6c5d2

Browse files
committed
AMDGPU: Don't add emergency stack slot if all spills are SGPR->VGPR
This should avoid reporting any stack needs to be allocated in the case where no stack is truly used. An unused stack slot is still left around in other cases where there are real stack objects but no spilling occurs. llvm-svn: 295891
1 parent 639d7b6 commit 7b6c5d2

File tree

2 files changed

+57
-39
lines changed

2 files changed

+57
-39
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 55 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,16 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
383383

384384
}
385385

386+
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
387+
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
388+
I != E; ++I) {
389+
if (!MFI.isDeadObjectIndex(I))
390+
return false;
391+
}
392+
393+
return true;
394+
}
395+
386396
void SIFrameLowering::processFunctionBeforeFrameFinalized(
387397
MachineFunction &MF,
388398
RegScavenger *RS) const {
@@ -391,8 +401,51 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
391401
if (!MFI.hasStackObjects())
392402
return;
393403

394-
bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
395-
if (MayNeedScavengingEmergencySlot) {
404+
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
405+
const SIInstrInfo *TII = ST.getInstrInfo();
406+
const SIRegisterInfo &TRI = TII->getRegisterInfo();
407+
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
408+
bool AllSGPRSpilledToVGPRs = false;
409+
410+
if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
411+
AllSGPRSpilledToVGPRs = true;
412+
413+
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
414+
// are spilled to VGPRs, in which case we can eliminate the stack usage.
415+
//
416+
// XXX - This operates under the assumption that only other SGPR spills are
417+
// users of the frame index. I'm not 100% sure this is correct. The
418+
// StackColoring pass has a comment saying a future improvement would be to
419+
// merging of allocas with spill slots, but for now according to
420+
// MachineFrameInfo isSpillSlot can't alias any other object.
421+
for (MachineBasicBlock &MBB : MF) {
422+
MachineBasicBlock::iterator Next;
423+
for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
424+
MachineInstr &MI = *I;
425+
Next = std::next(I);
426+
427+
if (TII->isSGPRSpill(MI)) {
428+
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
429+
if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
430+
bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
431+
(void)Spilled;
432+
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
433+
} else
434+
AllSGPRSpilledToVGPRs = false;
435+
}
436+
}
437+
}
438+
439+
FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
440+
}
441+
442+
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
443+
// but currently hasNonSpillStackObjects is set only from source
444+
// allocas. Stack temps produced from legalization are not counted currently.
445+
if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
446+
!AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
447+
assert(RS && "RegScavenger required if spilling");
448+
396449
// We force this to be at offset 0 so no user object ever has 0 as an
397450
// address, so we may use 0 as an invalid pointer value. This is because
398451
// LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
@@ -410,40 +463,6 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
410463
AMDGPU::SGPR_32RegClass.getSize(), 0, false);
411464
RS->addScavengingFrameIndex(ScavengeFI);
412465
}
413-
414-
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
415-
const SIInstrInfo *TII = ST.getInstrInfo();
416-
const SIRegisterInfo &TRI = TII->getRegisterInfo();
417-
if (!TRI.spillSGPRToVGPR())
418-
return;
419-
420-
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
421-
if (!FuncInfo->hasSpilledSGPRs())
422-
return;
423-
424-
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
425-
// are spilled to VGPRs, in which case we can eliminate the stack usage.
426-
//
427-
// XXX - This operates under the assumption that only other SGPR spills are
428-
// users of the frame index. I'm not 100% sure this is correct. The
429-
// StackColoring pass has a comment saying a future improvement would be to
430-
// merging of allocas with spill slots, but for now according to
431-
// MachineFrameInfo isSpillSlot can't alias any other object.
432-
for (MachineBasicBlock &MBB : MF) {
433-
MachineBasicBlock::iterator Next;
434-
for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
435-
MachineInstr &MI = *I;
436-
Next = std::next(I);
437-
438-
if (TII->isSGPRSpill(MI)) {
439-
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
440-
if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI))
441-
TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
442-
}
443-
}
444-
}
445-
446-
FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
447466
}
448467

449468
void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,

llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@
1818
; GCN: s_mov_b32 m0
1919

2020
; Make sure scratch space isn't being used for SGPR->VGPR spills
21-
; FIXME: Seem to be leaving behind unused emergency slot.
2221

2322
; Writing to M0 from an SMRD instruction will hang the GPU.
2423
; GCN-NOT: s_buffer_load_dword m0
2524
; GCN: s_endpgm
2625

27-
; TOVGPR: ScratchSize: 4{{$}}
26+
; TOVGPR: ScratchSize: 0{{$}}
2827
define amdgpu_ps void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
2928
main_body:
3029
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
@@ -768,7 +767,7 @@ ENDIF66: ; preds = %LOOP65
768767

769768
; GCN-LABEL: {{^}}main1:
770769
; GCN: s_endpgm
771-
; TOVGPR: ScratchSize: 4{{$}}
770+
; TOVGPR: ScratchSize: 0{{$}}
772771
define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
773772
main_body:
774773
%tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0

0 commit comments

Comments
 (0)