@@ -546,6 +546,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
546
546
BitVector Reserved (getNumRegs ());
547
547
Reserved.set (AMDGPU::MODE);
548
548
549
+ const SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
550
+
551
+ // Reserve special purpose registers.
552
+ //
549
553
// EXEC_LO and EXEC_HI could be allocated and used as regular register, but
550
554
// this seems likely to result in bugs, so I'm marking them as reserved.
551
555
reserveRegisterTuples (Reserved, AMDGPU::EXEC);
@@ -596,50 +600,15 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
596
600
Reserved.set (AMDGPU::VCC_HI);
597
601
}
598
602
603
+ // Reserve SGPRs.
604
+ //
599
605
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs (MF);
600
606
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs ();
601
607
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
602
608
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister (i);
603
609
reserveRegisterTuples (Reserved, Reg);
604
610
}
605
611
606
- const SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
607
- unsigned MaxNumVGPRs = ST.getMaxNumVGPRs (MF);
608
- unsigned MaxNumAGPRs = MaxNumVGPRs;
609
- unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs ();
610
-
611
- if (ST.hasGFX90AInsts ()) {
612
- // In an entry function without calls and AGPRs used it is possible to use
613
- // the whole register budget for VGPRs.
614
-
615
- // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and
616
- // split register file accordingly.
617
- if (MFI->usesAGPRs (MF)) {
618
- MaxNumVGPRs /= 2 ;
619
- MaxNumAGPRs = MaxNumVGPRs;
620
- } else {
621
- if (MaxNumVGPRs > TotalNumVGPRs) {
622
- MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
623
- MaxNumVGPRs = TotalNumVGPRs;
624
- } else
625
- MaxNumAGPRs = 0 ;
626
- }
627
- } else if (ST.hasMAIInsts ()) {
628
- // In order to guarantee copying between AGPRs, we need a scratch VGPR
629
- // available at all times.
630
- reserveRegisterTuples (Reserved, AMDGPU::VGPR32);
631
- }
632
-
633
- for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
634
- unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister (i);
635
- reserveRegisterTuples (Reserved, Reg);
636
- }
637
-
638
- for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
639
- unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister (i);
640
- reserveRegisterTuples (Reserved, Reg);
641
- }
642
-
643
612
for (auto Reg : AMDGPU::SReg_32RegClass) {
644
613
Reserved.set (getSubReg (Reg, AMDGPU::hi16));
645
614
Register Low = getSubReg (Reg, AMDGPU::lo16);
@@ -648,22 +617,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
648
617
Reserved.set (Low);
649
618
}
650
619
651
- for (auto Reg : AMDGPU::AGPR_32RegClass) {
652
- Reserved.set (getSubReg (Reg, AMDGPU::hi16));
653
- }
654
-
655
- // Reserve all the rest AGPRs if there are no instructions to use it.
656
- if (!ST.hasMAIInsts ()) {
657
- for (unsigned i = 0 ; i < MaxNumVGPRs; ++i) {
658
- unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister (i);
659
- reserveRegisterTuples (Reserved, Reg);
660
- }
661
- }
662
-
663
620
Register ScratchRSrcReg = MFI->getScratchRSrcReg ();
664
621
if (ScratchRSrcReg != AMDGPU::NoRegister) {
665
- // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
666
- // to spill.
622
+ // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
623
+ // need to spill.
667
624
// TODO: May need to reserve a VGPR if doing LDS spilling.
668
625
reserveRegisterTuples (Reserved, ScratchRSrcReg);
669
626
}
@@ -672,7 +629,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
672
629
// which is detected after the function is lowered. If we aren't really going
673
630
// to need SP, don't bother reserving it.
674
631
MCRegister StackPtrReg = MFI->getStackPtrOffsetReg ();
675
-
676
632
if (StackPtrReg) {
677
633
reserveRegisterTuples (Reserved, StackPtrReg);
678
634
assert (!isSubRegister (ScratchRSrcReg, StackPtrReg));
@@ -690,20 +646,64 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
690
646
assert (!isSubRegister (ScratchRSrcReg, BasePtrReg));
691
647
}
692
648
693
- for (auto Reg : MFI->WWMReservedRegs ) {
694
- reserveRegisterTuples (Reserved, Reg.first );
649
+ // Reserve VGPRs/AGPRs.
650
+ //
651
+ unsigned MaxNumVGPRs = ST.getMaxNumVGPRs (MF);
652
+ unsigned MaxNumAGPRs = MaxNumVGPRs;
653
+ unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs ();
654
+
655
+ // Reserve all the AGPRs if there are no instructions to use it.
656
+ if (!ST.hasMAIInsts ()) {
657
+ for (unsigned i = 0 ; i < MaxNumAGPRs; ++i) {
658
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister (i);
659
+ reserveRegisterTuples (Reserved, Reg);
660
+ }
695
661
}
696
662
697
- // Reserve VGPRs used for SGPR spilling.
698
- // Note we treat freezeReservedRegs unusually because we run register
699
- // allocation in two phases. It's OK to re-freeze with new registers for the
700
- // second run.
701
- #if 0
702
- for (auto &SpilledFI : MFI->sgpr_spill_vgprs()) {
703
- for (auto &SpilledVGPR : SpilledFI.second)
704
- reserveRegisterTuples(Reserved, SpilledVGPR.VGPR);
663
+ for (auto Reg : AMDGPU::AGPR_32RegClass) {
664
+ Reserved.set (getSubReg (Reg, AMDGPU::hi16));
665
+ }
666
+
667
+ // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
668
+ // a wave may have up to 512 total vector registers combining together both
669
+ // VGPRs and AGPRs. Hence, in an entry function without calls and without
670
+ // AGPRs used within it, it is possible to use the whole vector register
671
+ // budget for VGPRs.
672
+ //
673
+ // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
674
+ // register file accordingly.
675
+ if (ST.hasGFX90AInsts ()) {
676
+ if (MFI->usesAGPRs (MF)) {
677
+ MaxNumVGPRs /= 2 ;
678
+ MaxNumAGPRs = MaxNumVGPRs;
679
+ } else {
680
+ if (MaxNumVGPRs > TotalNumVGPRs) {
681
+ MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
682
+ MaxNumVGPRs = TotalNumVGPRs;
683
+ } else
684
+ MaxNumAGPRs = 0 ;
685
+ }
686
+ }
687
+
688
+ for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
689
+ unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister (i);
690
+ reserveRegisterTuples (Reserved, Reg);
691
+ }
692
+
693
+ for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
694
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister (i);
695
+ reserveRegisterTuples (Reserved, Reg);
696
+ }
697
+
698
+ // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
699
+ // VGPR available at all times.
700
+ if (ST.hasMAIInsts () && !ST.hasGFX90AInsts ()) {
701
+ reserveRegisterTuples (Reserved, AMDGPU::VGPR32);
702
+ }
703
+
704
+ for (auto Reg : MFI->WWMReservedRegs ) {
705
+ reserveRegisterTuples (Reserved, Reg.first );
705
706
}
706
- #endif
707
707
708
708
// FIXME: Stop using reserved registers for this.
709
709
for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs ())
0 commit comments