@@ -379,8 +379,7 @@ class PrologEpilogSGPRSpillBuilder {
379
379
} // namespace llvm
380
380
381
381
// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
382
- // and return the FlatScratchInit Register used
383
- Register SIFrameLowering::emitEntryFunctionFlatScratchInit (
382
+ void SIFrameLowering::emitEntryFunctionFlatScratchInit (
384
383
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
385
384
const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
386
385
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
@@ -400,7 +399,6 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
400
399
401
400
Register FlatScrInitLo;
402
401
Register FlatScrInitHi;
403
- Register FlatScratchInitReg;
404
402
405
403
if (ST.isAmdPalOS ()) {
406
404
// Extract the scratch offset from the descriptor in the GIT
@@ -410,6 +408,7 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
410
408
411
409
// Find unused reg to load flat scratch init into
412
410
MachineRegisterInfo &MRI = MF.getRegInfo ();
411
+ Register FlatScrInit = AMDGPU::NoRegister;
413
412
ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64 (MF);
414
413
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs () + 1 ) / 2 ;
415
414
AllSGPR64s = AllSGPR64s.slice (
@@ -418,28 +417,16 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
418
417
for (MCPhysReg Reg : AllSGPR64s) {
419
418
if (LiveUnits.available (Reg) && !MRI.isReserved (Reg) &&
420
419
MRI.isAllocatable (Reg) && !TRI->isSubRegisterEq (Reg, GITPtrLoReg)) {
421
- FlatScratchInitReg = Reg;
420
+ FlatScrInit = Reg;
422
421
break ;
423
422
}
424
423
}
424
+ assert (FlatScrInit && " Failed to find free register for scratch init" );
425
425
426
- } else {
427
- FlatScratchInitReg =
428
- MFI->getPreloadedReg (AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
429
-
430
- MachineRegisterInfo &MRI = MF.getRegInfo ();
431
- MRI.addLiveIn (FlatScratchInitReg);
432
- MBB.addLiveIn (FlatScratchInitReg);
433
- }
434
-
435
- assert (FlatScratchInitReg && " Failed to find free register for scratch init" );
436
-
437
- FlatScrInitLo = TRI->getSubReg (FlatScratchInitReg, AMDGPU::sub0);
438
- FlatScrInitHi = TRI->getSubReg (FlatScratchInitReg, AMDGPU::sub1);
439
-
440
- if (ST.isAmdPalOS ()) {
426
+ FlatScrInitLo = TRI->getSubReg (FlatScrInit, AMDGPU::sub0);
427
+ FlatScrInitHi = TRI->getSubReg (FlatScrInit, AMDGPU::sub1);
441
428
442
- buildGitPtr (MBB, I, DL, TII, FlatScratchInitReg );
429
+ buildGitPtr (MBB, I, DL, TII, FlatScrInit );
443
430
444
431
// We now have the GIT ptr - now get the scratch descriptor from the entry
445
432
// at offset 0 (or offset 16 for a compute shader).
@@ -454,18 +441,29 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
454
441
MF.getFunction ().getCallingConv () == CallingConv::AMDGPU_CS ? 16 : 0 ;
455
442
const GCNSubtarget &Subtarget = MF.getSubtarget <GCNSubtarget>();
456
443
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits (Subtarget, Offset);
457
- BuildMI (MBB, I, DL, LoadDwordX2, FlatScratchInitReg )
458
- .addReg (FlatScratchInitReg )
444
+ BuildMI (MBB, I, DL, LoadDwordX2, FlatScrInit )
445
+ .addReg (FlatScrInit )
459
446
.addImm (EncodedOffset) // offset
460
447
.addImm (0 ) // cpol
461
448
.addMemOperand (MMO);
462
449
463
450
// Mask the offset in [47:0] of the descriptor
464
451
const MCInstrDesc &SAndB32 = TII->get (AMDGPU::S_AND_B32);
465
452
auto And = BuildMI (MBB, I, DL, SAndB32, FlatScrInitHi)
466
- .addReg (FlatScrInitHi)
467
- .addImm (0xffff );
453
+ .addReg (FlatScrInitHi)
454
+ .addImm (0xffff );
468
455
And->getOperand (3 ).setIsDead (); // Mark SCC as dead.
456
+ } else {
457
+ Register FlatScratchInitReg =
458
+ MFI->getPreloadedReg (AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
459
+ assert (FlatScratchInitReg);
460
+
461
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
462
+ MRI.addLiveIn (FlatScratchInitReg);
463
+ MBB.addLiveIn (FlatScratchInitReg);
464
+
465
+ FlatScrInitLo = TRI->getSubReg (FlatScratchInitReg, AMDGPU::sub0);
466
+ FlatScrInitHi = TRI->getSubReg (FlatScratchInitReg, AMDGPU::sub1);
469
467
}
470
468
471
469
// Do a 64-bit pointer add.
@@ -488,21 +486,20 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
488
486
addReg (FlatScrInitHi).
489
487
addImm (int16_t (AMDGPU::Hwreg::ID_FLAT_SCR_HI |
490
488
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
491
- return FlatScratchInitReg ;
489
+ return ;
492
490
}
493
491
494
- assert (ST.getGeneration () == AMDGPUSubtarget::GFX9);
495
-
492
+ // For GFX9.
496
493
BuildMI (MBB, I, DL, TII->get (AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
497
- .addReg (FlatScrInitLo)
498
- .addReg (ScratchWaveOffsetReg);
494
+ .addReg (FlatScrInitLo)
495
+ .addReg (ScratchWaveOffsetReg);
499
496
auto Addc = BuildMI (MBB, I, DL, TII->get (AMDGPU::S_ADDC_U32),
500
497
AMDGPU::FLAT_SCR_HI)
501
498
.addReg (FlatScrInitHi)
502
499
.addImm (0 );
503
500
Addc->getOperand (3 ).setIsDead (); // Mark SCC as dead.
504
501
505
- return AMDGPU::FLAT_SCR ;
502
+ return ;
506
503
}
507
504
508
505
assert (ST.getGeneration () < AMDGPUSubtarget::GFX9);
@@ -523,7 +520,6 @@ Register SIFrameLowering::emitEntryFunctionFlatScratchInit(
523
520
.addReg (FlatScrInitLo, RegState::Kill)
524
521
.addImm (8 );
525
522
LShr->getOperand (3 ).setIsDead (); // Mark SCC as dead.
526
- return AMDGPU::FLAT_SCR;
527
523
}
528
524
529
525
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
@@ -615,15 +611,11 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
615
611
const SIInstrInfo *TII = ST.getInstrInfo ();
616
612
const SIRegisterInfo *TRI = &TII->getRegisterInfo ();
617
613
MachineRegisterInfo &MRI = MF.getRegInfo ();
614
+ const Function &F = MF.getFunction ();
618
615
MachineFrameInfo &FrameInfo = MF.getFrameInfo ();
619
616
620
617
assert (MFI->isEntryFunction ());
621
618
622
- bool NeedsFlatScratchInit =
623
- MFI->getUserSGPRInfo ().hasFlatScratchInit () &&
624
- (MRI.isPhysRegUsed (AMDGPU::FLAT_SCR) || FrameInfo.hasCalls () ||
625
- (!allStackObjectsAreDead (FrameInfo) && ST.enableFlatScratch ()));
626
-
627
619
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg (
628
620
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
629
621
@@ -649,7 +641,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
649
641
// Now that we have fixed the reserved SRSRC we need to locate the
650
642
// (potentially) preloaded SRSRC.
651
643
Register PreloadedScratchRsrcReg;
652
- if (ST.isAmdHsaOrMesa (MF. getFunction ()) && !NeedsFlatScratchInit ) {
644
+ if (ST.isAmdHsaOrMesa (F) ) {
653
645
PreloadedScratchRsrcReg =
654
646
MFI->getPreloadedReg (AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
655
647
if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
@@ -705,30 +697,33 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
705
697
BuildMI (MBB, I, DL, TII->get (AMDGPU::S_MOV_B32), FPReg).addImm (0 );
706
698
}
707
699
700
+ bool NeedsFlatScratchInit =
701
+ MFI->getUserSGPRInfo ().hasFlatScratchInit () &&
702
+ (MRI.isPhysRegUsed (AMDGPU::FLAT_SCR) || FrameInfo.hasCalls () ||
703
+ (!allStackObjectsAreDead (FrameInfo) && ST.enableFlatScratch ()));
704
+
708
705
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
709
706
PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected ()) {
710
707
MRI.addLiveIn (PreloadedScratchWaveOffsetReg);
711
708
MBB.addLiveIn (PreloadedScratchWaveOffsetReg);
712
709
}
713
710
714
- Register FlatScratchInit;
715
711
if (NeedsFlatScratchInit) {
716
- FlatScratchInit =
717
- emitEntryFunctionFlatScratchInit (MF, MBB, I, DL, ScratchWaveOffsetReg);
712
+ emitEntryFunctionFlatScratchInit (MF, MBB, I, DL, ScratchWaveOffsetReg);
718
713
}
719
714
720
715
if (ScratchRsrcReg) {
721
- emitEntryFunctionScratchRsrcRegSetup (
722
- MF, MBB, I, DL, FlatScratchInit, ScratchRsrcReg ,
723
- PreloadedScratchRsrcReg , ScratchWaveOffsetReg);
716
+ emitEntryFunctionScratchRsrcRegSetup (MF, MBB, I, DL,
717
+ PreloadedScratchRsrcReg ,
718
+ ScratchRsrcReg , ScratchWaveOffsetReg);
724
719
}
725
720
}
726
721
727
722
// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
728
723
void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup (
729
724
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
730
- const DebugLoc &DL, Register FlatScratchInit, Register ScratchRsrcReg ,
731
- Register PreloadedScratchRsrcReg , Register ScratchWaveOffsetReg) const {
725
+ const DebugLoc &DL, Register PreloadedScratchRsrcReg ,
726
+ Register ScratchRsrcReg , Register ScratchWaveOffsetReg) const {
732
727
733
728
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
734
729
const SIInstrInfo *TII = ST.getInstrInfo ();
@@ -776,8 +771,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
776
771
.addImm (21 )
777
772
.addReg (Rsrc03);
778
773
}
779
- } else if (ST.isMesaGfxShader (Fn) ||
780
- (!FlatScratchInit.isValid () && !PreloadedScratchRsrcReg)) {
774
+ } else if (ST.isMesaGfxShader (Fn) || !PreloadedScratchRsrcReg) {
781
775
assert (!ST.isAmdHsaOrMesa (Fn));
782
776
const MCInstrDesc &SMovB32 = TII->get (AMDGPU::S_MOV_B32);
783
777
@@ -836,26 +830,6 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
836
830
.addImm (Rsrc23 >> 32 )
837
831
.addReg (ScratchRsrcReg, RegState::ImplicitDefine);
838
832
} else if (ST.isAmdHsaOrMesa (Fn)) {
839
-
840
- if (FlatScratchInit) {
841
- const MCInstrDesc &SMovB32 = TII->get (AMDGPU::S_MOV_B32);
842
- Register Lo_32 = TRI->getSubReg (ScratchRsrcReg, AMDGPU::sub2);
843
- Register Hi_32 = TRI->getSubReg (ScratchRsrcReg, AMDGPU::sub3);
844
- uint64_t Rsrc23 = TII->getScratchRsrcWords23 ();
845
- I = BuildMI (MBB, I, DL, TII->get (AMDGPU::COPY),
846
- TRI->getSubReg (ScratchRsrcReg, AMDGPU::sub0_sub1))
847
- .addReg (FlatScratchInit)
848
- .addReg (ScratchRsrcReg, RegState::ImplicitDefine);
849
- BuildMI (MBB, I, DL, SMovB32, Lo_32)
850
- .addImm (Rsrc23 & 0xffffffff )
851
- .addReg (ScratchRsrcReg, RegState::ImplicitDefine);
852
-
853
- BuildMI (MBB, I, DL, SMovB32, Hi_32)
854
- .addImm (Rsrc23 >> 32 )
855
- .addReg (ScratchRsrcReg, RegState::ImplicitDefine);
856
- return ;
857
- }
858
-
859
833
assert (PreloadedScratchRsrcReg);
860
834
861
835
if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
0 commit comments