@@ -379,7 +379,8 @@ class PrologEpilogSGPRSpillBuilder {
379
379
} // namespace llvm
380
380
381
381
// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
382
- void SIFrameLowering::emitEntryFunctionFlatScratchInit (
382
+ // and return the FlatScratchInit Register used
383
+ Register SIFrameLowering::emitEntryFunctionFlatScratchInit (
383
384
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
384
385
const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
385
386
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
@@ -399,6 +400,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
399
400
400
401
Register FlatScrInitLo;
401
402
Register FlatScrInitHi;
403
+ Register FlatScratchInitReg;
402
404
403
405
if (ST.isAmdPalOS ()) {
404
406
// Extract the scratch offset from the descriptor in the GIT
@@ -408,7 +410,6 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
408
410
409
411
// Find unused reg to load flat scratch init into
410
412
MachineRegisterInfo &MRI = MF.getRegInfo ();
411
- Register FlatScrInit = AMDGPU::NoRegister;
412
413
ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64 (MF);
413
414
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs () + 1 ) / 2 ;
414
415
AllSGPR64s = AllSGPR64s.slice (
@@ -417,16 +418,28 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
417
418
for (MCPhysReg Reg : AllSGPR64s) {
418
419
if (LiveUnits.available (Reg) && !MRI.isReserved (Reg) &&
419
420
MRI.isAllocatable (Reg) && !TRI->isSubRegisterEq (Reg, GITPtrLoReg)) {
420
- FlatScrInit = Reg;
421
+ FlatScratchInitReg = Reg;
421
422
break ;
422
423
}
423
424
}
424
- assert (FlatScrInit && " Failed to find free register for scratch init" );
425
425
426
- FlatScrInitLo = TRI->getSubReg (FlatScrInit, AMDGPU::sub0);
427
- FlatScrInitHi = TRI->getSubReg (FlatScrInit, AMDGPU::sub1);
426
+ } else {
427
+ FlatScratchInitReg =
428
+ MFI->getPreloadedReg (AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
429
+
430
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
431
+ MRI.addLiveIn (FlatScratchInitReg);
432
+ MBB.addLiveIn (FlatScratchInitReg);
433
+ }
434
+
435
+ assert (FlatScratchInitReg && " Failed to find free register for scratch init" );
436
+
437
+ FlatScrInitLo = TRI->getSubReg (FlatScratchInitReg, AMDGPU::sub0);
438
+ FlatScrInitHi = TRI->getSubReg (FlatScratchInitReg, AMDGPU::sub1);
439
+
440
+ if (ST.isAmdPalOS ()) {
428
441
429
- buildGitPtr (MBB, I, DL, TII, FlatScrInit );
442
+ buildGitPtr (MBB, I, DL, TII, FlatScratchInitReg );
430
443
431
444
// We now have the GIT ptr - now get the scratch descriptor from the entry
432
445
// at offset 0 (or offset 16 for a compute shader).
@@ -441,29 +454,18 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
441
454
MF.getFunction ().getCallingConv () == CallingConv::AMDGPU_CS ? 16 : 0 ;
442
455
const GCNSubtarget &Subtarget = MF.getSubtarget <GCNSubtarget>();
443
456
unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits (Subtarget, Offset);
444
- BuildMI (MBB, I, DL, LoadDwordX2, FlatScrInit )
445
- .addReg (FlatScrInit )
457
+ BuildMI (MBB, I, DL, LoadDwordX2, FlatScratchInitReg )
458
+ .addReg (FlatScratchInitReg )
446
459
.addImm (EncodedOffset) // offset
447
460
.addImm (0 ) // cpol
448
461
.addMemOperand (MMO);
449
462
450
463
// Mask the offset in [47:0] of the descriptor
451
464
const MCInstrDesc &SAndB32 = TII->get (AMDGPU::S_AND_B32);
452
465
auto And = BuildMI (MBB, I, DL, SAndB32, FlatScrInitHi)
453
- .addReg (FlatScrInitHi)
454
- .addImm (0xffff );
466
+ .addReg (FlatScrInitHi)
467
+ .addImm (0xffff );
455
468
And->getOperand (3 ).setIsDead (); // Mark SCC as dead.
456
- } else {
457
- Register FlatScratchInitReg =
458
- MFI->getPreloadedReg (AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
459
- assert (FlatScratchInitReg);
460
-
461
- MachineRegisterInfo &MRI = MF.getRegInfo ();
462
- MRI.addLiveIn (FlatScratchInitReg);
463
- MBB.addLiveIn (FlatScratchInitReg);
464
-
465
- FlatScrInitLo = TRI->getSubReg (FlatScratchInitReg, AMDGPU::sub0);
466
- FlatScrInitHi = TRI->getSubReg (FlatScratchInitReg, AMDGPU::sub1);
467
469
}
468
470
469
471
// Do a 64-bit pointer add.
@@ -486,20 +488,21 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
486
488
addReg (FlatScrInitHi).
487
489
addImm (int16_t (AMDGPU::Hwreg::ID_FLAT_SCR_HI |
488
490
(31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
489
- return ;
491
+ return FlatScratchInitReg ;
490
492
}
491
493
492
- // For GFX9.
494
+ assert (ST.getGeneration () == AMDGPUSubtarget::GFX9);
495
+
493
496
BuildMI (MBB, I, DL, TII->get (AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
494
- .addReg (FlatScrInitLo)
495
- .addReg (ScratchWaveOffsetReg);
497
+ .addReg (FlatScrInitLo)
498
+ .addReg (ScratchWaveOffsetReg);
496
499
auto Addc = BuildMI (MBB, I, DL, TII->get (AMDGPU::S_ADDC_U32),
497
500
AMDGPU::FLAT_SCR_HI)
498
501
.addReg (FlatScrInitHi)
499
502
.addImm (0 );
500
503
Addc->getOperand (3 ).setIsDead (); // Mark SCC as dead.
501
504
502
- return ;
505
+ return AMDGPU::FLAT_SCR ;
503
506
}
504
507
505
508
assert (ST.getGeneration () < AMDGPUSubtarget::GFX9);
@@ -520,6 +523,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
520
523
.addReg (FlatScrInitLo, RegState::Kill)
521
524
.addImm (8 );
522
525
LShr->getOperand (3 ).setIsDead (); // Mark SCC as dead.
526
+ return AMDGPU::FLAT_SCR;
523
527
}
524
528
525
529
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
@@ -611,11 +615,15 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
611
615
const SIInstrInfo *TII = ST.getInstrInfo ();
612
616
const SIRegisterInfo *TRI = &TII->getRegisterInfo ();
613
617
MachineRegisterInfo &MRI = MF.getRegInfo ();
614
- const Function &F = MF.getFunction ();
615
618
MachineFrameInfo &FrameInfo = MF.getFrameInfo ();
616
619
617
620
assert (MFI->isEntryFunction ());
618
621
622
+ bool NeedsFlatScratchInit =
623
+ MFI->getUserSGPRInfo ().hasFlatScratchInit () &&
624
+ (MRI.isPhysRegUsed (AMDGPU::FLAT_SCR) || FrameInfo.hasCalls () ||
625
+ (!allStackObjectsAreDead (FrameInfo) && ST.enableFlatScratch ()));
626
+
619
627
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg (
620
628
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
621
629
@@ -641,7 +649,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
641
649
// Now that we have fixed the reserved SRSRC we need to locate the
642
650
// (potentially) preloaded SRSRC.
643
651
Register PreloadedScratchRsrcReg;
644
- if (ST.isAmdHsaOrMesa (F) ) {
652
+ if (ST.isAmdHsaOrMesa (MF. getFunction ()) && !NeedsFlatScratchInit ) {
645
653
PreloadedScratchRsrcReg =
646
654
MFI->getPreloadedReg (AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
647
655
if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
@@ -697,33 +705,30 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
697
705
BuildMI (MBB, I, DL, TII->get (AMDGPU::S_MOV_B32), FPReg).addImm (0 );
698
706
}
699
707
700
- bool NeedsFlatScratchInit =
701
- MFI->getUserSGPRInfo ().hasFlatScratchInit () &&
702
- (MRI.isPhysRegUsed (AMDGPU::FLAT_SCR) || FrameInfo.hasCalls () ||
703
- (!allStackObjectsAreDead (FrameInfo) && ST.enableFlatScratch ()));
704
-
705
708
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
706
709
PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected ()) {
707
710
MRI.addLiveIn (PreloadedScratchWaveOffsetReg);
708
711
MBB.addLiveIn (PreloadedScratchWaveOffsetReg);
709
712
}
710
713
714
+ Register FlatScratchInit;
711
715
if (NeedsFlatScratchInit) {
712
- emitEntryFunctionFlatScratchInit (MF, MBB, I, DL, ScratchWaveOffsetReg);
716
+ FlatScratchInit =
717
+ emitEntryFunctionFlatScratchInit (MF, MBB, I, DL, ScratchWaveOffsetReg);
713
718
}
714
719
715
720
if (ScratchRsrcReg) {
716
- emitEntryFunctionScratchRsrcRegSetup (MF, MBB, I, DL,
717
- PreloadedScratchRsrcReg ,
718
- ScratchRsrcReg , ScratchWaveOffsetReg);
721
+ emitEntryFunctionScratchRsrcRegSetup (
722
+ MF, MBB, I, DL, FlatScratchInit, ScratchRsrcReg ,
723
+ PreloadedScratchRsrcReg , ScratchWaveOffsetReg);
719
724
}
720
725
}
721
726
722
727
// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
723
728
void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup (
724
729
MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
725
- const DebugLoc &DL, Register PreloadedScratchRsrcReg ,
726
- Register ScratchRsrcReg , Register ScratchWaveOffsetReg) const {
730
+ const DebugLoc &DL, Register FlatScratchInit, Register ScratchRsrcReg ,
731
+ Register PreloadedScratchRsrcReg , Register ScratchWaveOffsetReg) const {
727
732
728
733
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
729
734
const SIInstrInfo *TII = ST.getInstrInfo ();
@@ -771,7 +776,8 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
771
776
.addImm (21 )
772
777
.addReg (Rsrc03);
773
778
}
774
- } else if (ST.isMesaGfxShader (Fn) || !PreloadedScratchRsrcReg) {
779
+ } else if (ST.isMesaGfxShader (Fn) ||
780
+ (!FlatScratchInit.isValid () && !PreloadedScratchRsrcReg)) {
775
781
assert (!ST.isAmdHsaOrMesa (Fn));
776
782
const MCInstrDesc &SMovB32 = TII->get (AMDGPU::S_MOV_B32);
777
783
@@ -830,6 +836,26 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
830
836
.addImm (Rsrc23 >> 32 )
831
837
.addReg (ScratchRsrcReg, RegState::ImplicitDefine);
832
838
} else if (ST.isAmdHsaOrMesa (Fn)) {
839
+
840
+ if (FlatScratchInit) {
841
+ const MCInstrDesc &SMovB32 = TII->get (AMDGPU::S_MOV_B32);
842
+ Register Lo_32 = TRI->getSubReg (ScratchRsrcReg, AMDGPU::sub2);
843
+ Register Hi_32 = TRI->getSubReg (ScratchRsrcReg, AMDGPU::sub3);
844
+ uint64_t Rsrc23 = TII->getScratchRsrcWords23 ();
845
+ I = BuildMI (MBB, I, DL, TII->get (AMDGPU::COPY),
846
+ TRI->getSubReg (ScratchRsrcReg, AMDGPU::sub0_sub1))
847
+ .addReg (FlatScratchInit)
848
+ .addReg (ScratchRsrcReg, RegState::ImplicitDefine);
849
+ BuildMI (MBB, I, DL, SMovB32, Lo_32)
850
+ .addImm (Rsrc23 & 0xffffffff )
851
+ .addReg (ScratchRsrcReg, RegState::ImplicitDefine);
852
+
853
+ BuildMI (MBB, I, DL, SMovB32, Hi_32)
854
+ .addImm (Rsrc23 >> 32 )
855
+ .addReg (ScratchRsrcReg, RegState::ImplicitDefine);
856
+ return ;
857
+ }
858
+
833
859
assert (PreloadedScratchRsrcReg);
834
860
835
861
if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
0 commit comments