Skip to content

Commit 236f860

Browse files
cdevadasDavid Salinas
authored andcommitted
[AMDGPU] Pick available high VGPR for CSR SGPR spilling (llvm#78669)
CSR SGPR spilling currently uses the early available physical VGPRs. It currently imposes a high register pressure while trying to allocate large VGPR tuples within the default register budget. This patch changes the spilling strategy by picking the VGPRs in the reverse order, the highest available VGPR first and later after regalloc shift them back to the lowest available range. With that, the initial VGPRs would be available for allocation and possibility of finding large number of contiguous registers will be more. Change-Id: Ib22d02f3dd5255a5b9157099acb91b779d3d1fc0
1 parent ea5f6b0 commit 236f860

32 files changed

+4342
-3713
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,8 @@ static void getVGPRSpillLaneOrTempRegister(
169169
TargetStackID::SGPRSpill);
170170

171171
if (TRI->spillSGPRToVGPR() &&
172-
MFI->allocateSGPRSpillToVGPRLane(MF, FI, /* IsPrologEpilog */ true)) {
172+
MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
173+
/*IsPrologEpilog=*/true)) {
173174
// 2: There's no free lane to spill, and no free register to save the
174175
// SGPR, so we're forced to take another VGPR to use for the spill.
175176
MFI->addToPrologEpilogSGPRSpills(
@@ -1844,6 +1845,8 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
18441845
if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
18451846
return;
18461847

1848+
MFI->shiftSpillPhysVGPRsToLowestRange(MF);
1849+
18471850
TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
18481851
if (MFI->isEntryFunction())
18491852
return;

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
365365
// regalloc aware CFI generation to insert new CFIs along with the
366366
// intermediate spills is implemented. There is no such support
367367
// currently exist in the LLVM compiler.
368-
if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI, true)) {
369-
NewReservedRegs = true;
368+
if (FuncInfo->allocateSGPRSpillToVGPRLane(
369+
MF, FI, /*SpillToPhysVGPRLane=*/true)) {
370370
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
371371
MI, FI, nullptr, Indexes, LIS, true);
372372
if (!Spilled)

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,33 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
312312
return false;
313313
}
314314

315+
void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
316+
MachineFunction &MF) {
317+
const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
318+
MachineRegisterInfo &MRI = MF.getRegInfo();
319+
for (unsigned I = 0, E = SpillPhysVGPRs.size(); I < E; ++I) {
320+
Register Reg = SpillPhysVGPRs[I];
321+
Register NewReg =
322+
TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
323+
if (!NewReg || NewReg >= Reg)
324+
break;
325+
326+
MRI.replaceRegWith(Reg, NewReg);
327+
328+
// Update various tables with the new VGPR.
329+
SpillPhysVGPRs[I] = NewReg;
330+
WWMReservedRegs.remove(Reg);
331+
WWMReservedRegs.insert(NewReg);
332+
WWMSpills.insert(std::make_pair(NewReg, WWMSpills[Reg]));
333+
WWMSpills.erase(Reg);
334+
335+
for (MachineBasicBlock &MBB : MF) {
336+
MBB.removeLiveIn(Reg);
337+
MBB.sortUniqueLiveIns();
338+
}
339+
}
340+
}
341+
315342
bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
316343
MachineFunction &MF, int FI, unsigned LaneIndex) {
317344
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -329,13 +356,17 @@ bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
329356
}
330357

331358
bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
332-
MachineFunction &MF, int FI, unsigned LaneIndex) {
359+
MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) {
333360
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
334361
const SIRegisterInfo *TRI = ST.getRegisterInfo();
335362
MachineRegisterInfo &MRI = MF.getRegInfo();
336363
Register LaneVGPR;
337364
if (!LaneIndex) {
338-
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
365+
// Find the highest available register if called before RA to ensure the
366+
// lowest registers are available for allocation. The LaneVGPR, in that
367+
// case, will be shifted back to the lowest range after VGPR allocation.
368+
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF,
369+
!IsPrologEpilog);
339370
if (LaneVGPR == AMDGPU::NoRegister) {
340371
// We have no VGPRs left for spilling SGPRs. Reset because we will not
341372
// partially spill the SGPR to VGPRs.
@@ -359,12 +390,12 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
359390
return true;
360391
}
361392

362-
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
363-
int FI,
364-
bool IsPrologEpilog) {
393+
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(
394+
MachineFunction &MF, int FI, bool SpillToPhysVGPRLane,
395+
bool IsPrologEpilog) {
365396
std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
366-
IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI]
367-
: SGPRSpillsToVirtualVGPRLanes[FI];
397+
SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes[FI]
398+
: SGPRSpillsToVirtualVGPRLanes[FI];
368399

369400
// This has already been allocated.
370401
if (!SpillLanes.empty())
@@ -384,14 +415,15 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
384415
assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
385416
"not spilling SGPRs to VGPRs");
386417

387-
unsigned &NumSpillLanes =
388-
IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes;
418+
unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
419+
: NumVirtualVGPRSpillLanes;
389420

390421
for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
391422
unsigned LaneIndex = (NumSpillLanes % WaveSize);
392423

393-
bool Allocated = IsPrologEpilog
394-
? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex)
424+
bool Allocated = SpillToPhysVGPRLane
425+
? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
426+
IsPrologEpilog)
395427
: allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
396428
if (!Allocated) {
397429
NumSpillLanes -= I;

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
548548
bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI,
549549
unsigned LaneIndex);
550550
bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI,
551-
unsigned LaneIndex);
551+
unsigned LaneIndex,
552+
bool IsPrologEpilog);
552553

553554
public:
554555
Register getVGPRForAGPRCopy() const {
@@ -588,6 +589,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
588589
}
589590

590591
ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; }
592+
591593
const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
592594
const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
593595

@@ -711,7 +713,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
711713
I->second.IsDead = true;
712714
}
713715

716+
// To bring the Physical VGPRs in the highest range allocated for CSR SGPR
717+
// spilling into the lowest available range.
718+
void shiftSpillPhysVGPRsToLowestRange(MachineFunction &MF);
719+
714720
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI,
721+
bool SpillToPhysVGPRLane = false,
715722
bool IsPrologEpilog = false);
716723
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
717724

0 commit comments

Comments
 (0)