@@ -30,6 +30,11 @@ using namespace llvm;
30
30
31
31
using MBBVector = SmallVector<MachineBasicBlock *, 4 >;
32
32
33
+ static cl::opt<bool >
34
+ LegacySGPRSpillLowering (" amdgpu-legacy-sgpr-spill-lowering" ,
35
+ cl::desc (" Enable the legacy SGPR spill lowering" ),
36
+ cl::ReallyHidden, cl::init(false ));
37
+
33
38
namespace {
34
39
35
40
class SILowerSGPRSpills : public MachineFunctionPass {
@@ -54,6 +59,12 @@ class SILowerSGPRSpills : public MachineFunctionPass {
54
59
SmallVectorImpl<int > &CalleeSavedFIs);
55
60
void extendWWMVirtRegLiveness (MachineFunction &MF, LiveIntervals *LIS);
56
61
62
+ void legacySpillLowering (MachineFunction &MF, BitVector &SpillFIs,
63
+ bool &NewReservedRegs);
64
+ void lowerSpills (MachineFunction &MF, SmallVectorImpl<int > &CalleeSavedFIs,
65
+ BitVector &SpillFIs, bool &NewReservedRegs,
66
+ bool &SpilledToVirtVGPRLanes);
67
+
57
68
bool runOnMachineFunction (MachineFunction &MF) override ;
58
69
59
70
void getAnalysisUsage (AnalysisUsage &AU) const override {
@@ -298,6 +309,93 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
298
309
}
299
310
}
300
311
312
+ // The fallback legacy spill method.
313
+ void SILowerSGPRSpills::legacySpillLowering (MachineFunction &MF,
314
+ BitVector &SpillFIs,
315
+ bool &NewReservedRegs) {
316
+ MachineFrameInfo &MFI = MF.getFrameInfo ();
317
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
318
+ for (MachineBasicBlock &MBB : MF) {
319
+ for (MachineInstr &MI : llvm::make_early_inc_range (MBB)) {
320
+ if (!TII->isSGPRSpill (MI))
321
+ continue ;
322
+
323
+ int FI = TII->getNamedOperand (MI, AMDGPU::OpName::addr)->getIndex ();
324
+ assert (MFI.getStackID (FI) == TargetStackID::SGPRSpill);
325
+
326
+ if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI, true )) {
327
+ NewReservedRegs = true ;
328
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
329
+ MI, FI, nullptr , Indexes, LIS, true );
330
+ if (!Spilled)
331
+ llvm_unreachable (
332
+ " failed to spill SGPR to physical VGPR lane when allocated" );
333
+ }
334
+ SpillFIs.set (FI);
335
+ }
336
+ }
337
+ }
338
+
339
+ // The improved method that spills sgprs into lanes of virtual vgprs. The
340
+ // regalloc will efficiently allocate them with physical registers.
341
+ void SILowerSGPRSpills::lowerSpills (MachineFunction &MF,
342
+ SmallVectorImpl<int > &CalleeSavedFIs,
343
+ BitVector &SpillFIs, bool &NewReservedRegs,
344
+ bool &SpilledToVirtVGPRLanes) {
345
+ MachineFrameInfo &MFI = MF.getFrameInfo ();
346
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
347
+ for (MachineBasicBlock &MBB : MF) {
348
+ for (MachineInstr &MI : llvm::make_early_inc_range (MBB)) {
349
+ if (!TII->isSGPRSpill (MI))
350
+ continue ;
351
+
352
+ int FI = TII->getNamedOperand (MI, AMDGPU::OpName::addr)->getIndex ();
353
+ assert (MFI.getStackID (FI) == TargetStackID::SGPRSpill);
354
+
355
+ bool IsCalleeSaveSGPRSpill = llvm::is_contained (CalleeSavedFIs, FI);
356
+ if (IsCalleeSaveSGPRSpill) {
357
+ // Spill callee-saved SGPRs into physical VGPR lanes.
358
+
359
+ // TODO: This is to ensure the CFIs are static for efficient frame
360
+ // unwinding in the debugger. Spilling them into virtual VGPR lanes
361
+ // involve regalloc to allocate the physical VGPRs and that might
362
+ // cause intermediate spill/split of such liveranges for successful
363
+ // allocation. This would result in broken CFI encoding unless the
364
+ // regalloc aware CFI generation to insert new CFIs along with the
365
+ // intermediate spills is implemented. There is no such support
366
+ // currently exist in the LLVM compiler.
367
+ if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI, true )) {
368
+ NewReservedRegs = true ;
369
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
370
+ MI, FI, nullptr , Indexes, LIS, true );
371
+ if (!Spilled)
372
+ llvm_unreachable (
373
+ " failed to spill SGPR to physical VGPR lane when allocated" );
374
+ }
375
+ } else {
376
+ if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI)) {
377
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
378
+ MI, FI, nullptr , Indexes, LIS);
379
+ if (!Spilled)
380
+ llvm_unreachable (
381
+ " failed to spill SGPR to virtual VGPR lane when allocated" );
382
+ SpillFIs.set (FI);
383
+ SpilledToVirtVGPRLanes = true ;
384
+ }
385
+ }
386
+ }
387
+ }
388
+
389
+ if (SpilledToVirtVGPRLanes) {
390
+ extendWWMVirtRegLiveness (MF, LIS);
391
+ if (LIS) {
392
+ // Compute the LiveInterval for the newly created virtual registers.
393
+ for (auto Reg : FuncInfo->getSGPRSpillVGPRs ())
394
+ LIS->createAndComputeVirtRegInterval (Reg);
395
+ }
396
+ }
397
+ }
398
+
301
399
bool SILowerSGPRSpills::runOnMachineFunction (MachineFunction &MF) {
302
400
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
303
401
TII = ST.getInstrInfo ();
@@ -330,8 +428,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
330
428
331
429
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
332
430
// handled as SpilledToReg in regular PrologEpilogInserter.
333
- const bool HasSGPRSpillToVGPR = TRI-> spillSGPRToVGPR () &&
334
- (HasCSRs || FuncInfo->hasSpilledSGPRs ());
431
+ const bool HasSGPRSpillToVGPR =
432
+ TRI-> spillSGPRToVGPR () && (HasCSRs || FuncInfo->hasSpilledSGPRs ());
335
433
if (HasSGPRSpillToVGPR) {
336
434
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
337
435
// are spilled to VGPRs, in which case we can eliminate the stack usage.
@@ -342,58 +440,13 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
342
440
// To track the spill frame indices handled in this pass.
343
441
BitVector SpillFIs (MFI.getObjectIndexEnd (), false );
344
442
345
- for (MachineBasicBlock &MBB : MF) {
346
- for (MachineInstr &MI : llvm::make_early_inc_range (MBB)) {
347
- if (!TII->isSGPRSpill (MI))
348
- continue ;
349
-
350
- int FI = TII->getNamedOperand (MI, AMDGPU::OpName::addr)->getIndex ();
351
- assert (MFI.getStackID (FI) == TargetStackID::SGPRSpill);
352
-
353
- bool IsCalleeSaveSGPRSpill =
354
- std::find (CalleeSavedFIs.begin (), CalleeSavedFIs.end (), FI) !=
355
- CalleeSavedFIs.end ();
356
- if (IsCalleeSaveSGPRSpill) {
357
- // Spill callee-saved SGPRs into physical VGPR lanes.
358
-
359
- // TODO: This is to ensure the CFIs are static for efficient frame
360
- // unwinding in the debugger. Spilling them into virtual VGPR lanes
361
- // involve regalloc to allocate the physical VGPRs and that might
362
- // cause intermediate spill/split of such liveranges for successful
363
- // allocation. This would result in broken CFI encoding unless the
364
- // regalloc aware CFI generation to insert new CFIs along with the
365
- // intermediate spills is implemented. There is no such support
366
- // currently exist in the LLVM compiler.
367
- if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI, true )) {
368
- NewReservedRegs = true ;
369
- bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
370
- MI, FI, nullptr , Indexes, LIS, true );
371
- if (!Spilled)
372
- llvm_unreachable (
373
- " failed to spill SGPR to physical VGPR lane when allocated" );
374
- }
375
- } else {
376
- if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI)) {
377
- bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
378
- MI, FI, nullptr , Indexes, LIS);
379
- if (!Spilled)
380
- llvm_unreachable (
381
- " failed to spill SGPR to virtual VGPR lane when allocated" );
382
- SpillFIs.set (FI);
383
- SpilledToVirtVGPRLanes = true ;
384
- }
385
- }
386
- }
387
- }
388
-
389
- if (SpilledToVirtVGPRLanes) {
390
- extendWWMVirtRegLiveness (MF, LIS);
391
- if (LIS) {
392
- // Compute the LiveInterval for the newly created virtual registers.
393
- for (auto Reg : FuncInfo->getSGPRSpillVGPRs ())
394
- LIS->createAndComputeVirtRegInterval (Reg);
395
- }
396
- }
443
+ // LegacySGPRSpillLowering switch is recommended to use only as a fallback
444
+ // method when the new spill lowering causes any runtime issues.
445
+ if (LegacySGPRSpillLowering)
446
+ legacySpillLowering (MF, SpillFIs, NewReservedRegs);
447
+ else
448
+ lowerSpills (MF, CalleeSavedFIs, SpillFIs, NewReservedRegs,
449
+ SpilledToVirtVGPRLanes);
397
450
398
451
for (MachineBasicBlock &MBB : MF) {
399
452
// FIXME: The dead frame indices are replaced with a null register from
@@ -426,9 +479,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
426
479
MadeChange = true ;
427
480
}
428
481
429
- if (SpilledToVirtVGPRLanes) {
430
- const TargetRegisterClass *RC =
431
- ST.isWave32 () ? &AMDGPU::SGPR_32RegClass : &AMDGPU::SGPR_64RegClass;
482
+ if (!LegacySGPRSpillLowering && SpilledToVirtVGPRLanes) {
483
+ const TargetRegisterClass *RC = TRI->getWaveMaskRegClass ();
432
484
// Shift back the reserved SGPR for EXEC copy into the lowest range.
433
485
// This SGPR is reserved to handle the whole-wave spill/copy operations
434
486
// that might get inserted during vgpr regalloc.
0 commit comments