Skip to content

Commit 357ce67

Browse files
cdevadasMakarand Maydeo
authored andcommitted
[AMDGPU][SILowerSGPRSpills] Add fallback path to legacy sgpr spill handling
Added the commandline switch `amdgpu-legacy-sgpr-spill-lowering` to enable the legacy spill lowering method. This flag is turned off by default as the new spill handling method is preferred in the compiler. This switch is only temporary and can be used when the default spill method produces incorrect codegen. This flag will be removed when the new spill handling path is fully functional and handles all corner cases. Change-Id: Ib9a1e470c3e8cdc1a595894af625b19952012c97 (cherry picked from commit 9d865e6c9a48d6e3f422d8e3d3b78a958cc206e9)
1 parent 923882e commit 357ce67

File tree

1 file changed

+109
-57
lines changed

1 file changed

+109
-57
lines changed

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 109 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ using namespace llvm;
3030

3131
using MBBVector = SmallVector<MachineBasicBlock *, 4>;
3232

33+
static cl::opt<bool>
34+
LegacySGPRSpillLowering("amdgpu-legacy-sgpr-spill-lowering",
35+
cl::desc("Enable the legacy SGPR spill lowering"),
36+
cl::ReallyHidden, cl::init(false));
37+
3338
namespace {
3439

3540
class SILowerSGPRSpills : public MachineFunctionPass {
@@ -54,6 +59,12 @@ class SILowerSGPRSpills : public MachineFunctionPass {
5459
SmallVectorImpl<int> &CalleeSavedFIs);
5560
void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
5661

62+
void legacySpillLowering(MachineFunction &MF, BitVector &SpillFIs,
63+
bool &NewReservedRegs);
64+
void lowerSpills(MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs,
65+
BitVector &SpillFIs, bool &NewReservedRegs,
66+
bool &SpilledToVirtVGPRLanes);
67+
5768
bool runOnMachineFunction(MachineFunction &MF) override;
5869

5970
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -298,6 +309,93 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
298309
}
299310
}
300311

312+
// The fallback legacy spill method.
313+
void SILowerSGPRSpills::legacySpillLowering(MachineFunction &MF,
314+
BitVector &SpillFIs,
315+
bool &NewReservedRegs) {
316+
MachineFrameInfo &MFI = MF.getFrameInfo();
317+
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
318+
for (MachineBasicBlock &MBB : MF) {
319+
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
320+
if (!TII->isSGPRSpill(MI))
321+
continue;
322+
323+
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
324+
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
325+
326+
if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI, true)) {
327+
NewReservedRegs = true;
328+
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
329+
MI, FI, nullptr, Indexes, LIS, true);
330+
if (!Spilled)
331+
llvm_unreachable(
332+
"failed to spill SGPR to physical VGPR lane when allocated");
333+
}
334+
SpillFIs.set(FI);
335+
}
336+
}
337+
}
338+
339+
// The improved method that spills sgprs into lanes of virtual vgprs. The
340+
// regalloc will efficiently allocate them with physical registers.
341+
void SILowerSGPRSpills::lowerSpills(MachineFunction &MF,
342+
SmallVectorImpl<int> &CalleeSavedFIs,
343+
BitVector &SpillFIs, bool &NewReservedRegs,
344+
bool &SpilledToVirtVGPRLanes) {
345+
MachineFrameInfo &MFI = MF.getFrameInfo();
346+
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
347+
for (MachineBasicBlock &MBB : MF) {
348+
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
349+
if (!TII->isSGPRSpill(MI))
350+
continue;
351+
352+
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
353+
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
354+
355+
bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
356+
if (IsCalleeSaveSGPRSpill) {
357+
// Spill callee-saved SGPRs into physical VGPR lanes.
358+
359+
// TODO: This is to ensure the CFIs are static for efficient frame
360+
// unwinding in the debugger. Spilling them into virtual VGPR lanes
361+
// involve regalloc to allocate the physical VGPRs and that might
362+
// cause intermediate spill/split of such liveranges for successful
363+
// allocation. This would result in broken CFI encoding unless the
364+
// regalloc aware CFI generation to insert new CFIs along with the
365+
// intermediate spills is implemented. There is no such support
366+
// currently exist in the LLVM compiler.
367+
if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI, true)) {
368+
NewReservedRegs = true;
369+
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
370+
MI, FI, nullptr, Indexes, LIS, true);
371+
if (!Spilled)
372+
llvm_unreachable(
373+
"failed to spill SGPR to physical VGPR lane when allocated");
374+
}
375+
} else {
376+
if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
377+
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
378+
MI, FI, nullptr, Indexes, LIS);
379+
if (!Spilled)
380+
llvm_unreachable(
381+
"failed to spill SGPR to virtual VGPR lane when allocated");
382+
SpillFIs.set(FI);
383+
SpilledToVirtVGPRLanes = true;
384+
}
385+
}
386+
}
387+
}
388+
389+
if (SpilledToVirtVGPRLanes) {
390+
extendWWMVirtRegLiveness(MF, LIS);
391+
if (LIS) {
392+
// Compute the LiveInterval for the newly created virtual registers.
393+
for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
394+
LIS->createAndComputeVirtRegInterval(Reg);
395+
}
396+
}
397+
}
398+
301399
bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
302400
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
303401
TII = ST.getInstrInfo();
@@ -330,8 +428,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
330428

331429
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
332430
// handled as SpilledToReg in regular PrologEpilogInserter.
333-
const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
334-
(HasCSRs || FuncInfo->hasSpilledSGPRs());
431+
const bool HasSGPRSpillToVGPR =
432+
TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs());
335433
if (HasSGPRSpillToVGPR) {
336434
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
337435
// are spilled to VGPRs, in which case we can eliminate the stack usage.
@@ -342,58 +440,13 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
342440
// To track the spill frame indices handled in this pass.
343441
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
344442

345-
for (MachineBasicBlock &MBB : MF) {
346-
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
347-
if (!TII->isSGPRSpill(MI))
348-
continue;
349-
350-
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
351-
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
352-
353-
bool IsCalleeSaveSGPRSpill =
354-
std::find(CalleeSavedFIs.begin(), CalleeSavedFIs.end(), FI) !=
355-
CalleeSavedFIs.end();
356-
if (IsCalleeSaveSGPRSpill) {
357-
// Spill callee-saved SGPRs into physical VGPR lanes.
358-
359-
// TODO: This is to ensure the CFIs are static for efficient frame
360-
// unwinding in the debugger. Spilling them into virtual VGPR lanes
361-
// involve regalloc to allocate the physical VGPRs and that might
362-
// cause intermediate spill/split of such liveranges for successful
363-
// allocation. This would result in broken CFI encoding unless the
364-
// regalloc aware CFI generation to insert new CFIs along with the
365-
// intermediate spills is implemented. There is no such support
366-
// currently exist in the LLVM compiler.
367-
if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI, true)) {
368-
NewReservedRegs = true;
369-
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
370-
MI, FI, nullptr, Indexes, LIS, true);
371-
if (!Spilled)
372-
llvm_unreachable(
373-
"failed to spill SGPR to physical VGPR lane when allocated");
374-
}
375-
} else {
376-
if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
377-
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
378-
MI, FI, nullptr, Indexes, LIS);
379-
if (!Spilled)
380-
llvm_unreachable(
381-
"failed to spill SGPR to virtual VGPR lane when allocated");
382-
SpillFIs.set(FI);
383-
SpilledToVirtVGPRLanes = true;
384-
}
385-
}
386-
}
387-
}
388-
389-
if (SpilledToVirtVGPRLanes) {
390-
extendWWMVirtRegLiveness(MF, LIS);
391-
if (LIS) {
392-
// Compute the LiveInterval for the newly created virtual registers.
393-
for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
394-
LIS->createAndComputeVirtRegInterval(Reg);
395-
}
396-
}
443+
// LegacySGPRSpillLowering switch is recommended to use only as a fallback
444+
// method when the new spill lowering causes any runtime issues.
445+
if (LegacySGPRSpillLowering)
446+
legacySpillLowering(MF, SpillFIs, NewReservedRegs);
447+
else
448+
lowerSpills(MF, CalleeSavedFIs, SpillFIs, NewReservedRegs,
449+
SpilledToVirtVGPRLanes);
397450

398451
for (MachineBasicBlock &MBB : MF) {
399452
// FIXME: The dead frame indices are replaced with a null register from
@@ -426,9 +479,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
426479
MadeChange = true;
427480
}
428481

429-
if (SpilledToVirtVGPRLanes) {
430-
const TargetRegisterClass *RC =
431-
ST.isWave32() ? &AMDGPU::SGPR_32RegClass : &AMDGPU::SGPR_64RegClass;
482+
if (!LegacySGPRSpillLowering && SpilledToVirtVGPRLanes) {
483+
const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
432484
// Shift back the reserved SGPR for EXEC copy into the lowest range.
433485
// This SGPR is reserved to handle the whole-wave spill/copy operations
434486
// that might get inserted during vgpr regalloc.

0 commit comments

Comments
 (0)