Skip to content

Commit f014303

Browse files
committed
[AMDGPU] [NFC]: Organize the code around reserving registers.
First, add code to reserve all required special purpose registers, followed by code to reserve SGPRs, followed by code to reserve VGPRs/AGPRs. This patch is prepared as a pre-requisite to fix an issue related to GFX90A hardware. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D122219
1 parent 818e72d commit f014303

File tree

1 file changed

+63
-63
lines changed

1 file changed

+63
-63
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
546546
BitVector Reserved(getNumRegs());
547547
Reserved.set(AMDGPU::MODE);
548548

549+
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
550+
551+
// Reserve special purpose registers.
552+
//
549553
// EXEC_LO and EXEC_HI could be allocated and used as regular register, but
550554
// this seems likely to result in bugs, so I'm marking them as reserved.
551555
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
@@ -596,50 +600,15 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
596600
Reserved.set(AMDGPU::VCC_HI);
597601
}
598602

603+
// Reserve SGPRs.
604+
//
599605
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
600606
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
601607
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
602608
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
603609
reserveRegisterTuples(Reserved, Reg);
604610
}
605611

606-
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
607-
unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
608-
unsigned MaxNumAGPRs = MaxNumVGPRs;
609-
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
610-
611-
if (ST.hasGFX90AInsts()) {
612-
// In an entry function without calls and AGPRs used it is possible to use
613-
// the whole register budget for VGPRs.
614-
615-
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and
616-
// split register file accordingly.
617-
if (MFI->usesAGPRs(MF)) {
618-
MaxNumVGPRs /= 2;
619-
MaxNumAGPRs = MaxNumVGPRs;
620-
} else {
621-
if (MaxNumVGPRs > TotalNumVGPRs) {
622-
MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
623-
MaxNumVGPRs = TotalNumVGPRs;
624-
} else
625-
MaxNumAGPRs = 0;
626-
}
627-
} else if (ST.hasMAIInsts()) {
628-
// In order to guarantee copying between AGPRs, we need a scratch VGPR
629-
// available at all times.
630-
reserveRegisterTuples(Reserved, AMDGPU::VGPR32);
631-
}
632-
633-
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
634-
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
635-
reserveRegisterTuples(Reserved, Reg);
636-
}
637-
638-
for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
639-
unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
640-
reserveRegisterTuples(Reserved, Reg);
641-
}
642-
643612
for (auto Reg : AMDGPU::SReg_32RegClass) {
644613
Reserved.set(getSubReg(Reg, AMDGPU::hi16));
645614
Register Low = getSubReg(Reg, AMDGPU::lo16);
@@ -648,22 +617,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
648617
Reserved.set(Low);
649618
}
650619

651-
for (auto Reg : AMDGPU::AGPR_32RegClass) {
652-
Reserved.set(getSubReg(Reg, AMDGPU::hi16));
653-
}
654-
655-
// Reserve all the rest AGPRs if there are no instructions to use it.
656-
if (!ST.hasMAIInsts()) {
657-
for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
658-
unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
659-
reserveRegisterTuples(Reserved, Reg);
660-
}
661-
}
662-
663620
Register ScratchRSrcReg = MFI->getScratchRSrcReg();
664621
if (ScratchRSrcReg != AMDGPU::NoRegister) {
665-
// Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
666-
// to spill.
622+
// Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
623+
// need to spill.
667624
// TODO: May need to reserve a VGPR if doing LDS spilling.
668625
reserveRegisterTuples(Reserved, ScratchRSrcReg);
669626
}
@@ -672,7 +629,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
672629
// which is detected after the function is lowered. If we aren't really going
673630
// to need SP, don't bother reserving it.
674631
MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
675-
676632
if (StackPtrReg) {
677633
reserveRegisterTuples(Reserved, StackPtrReg);
678634
assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
@@ -690,20 +646,64 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
690646
assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
691647
}
692648

693-
for (auto Reg : MFI->WWMReservedRegs) {
694-
reserveRegisterTuples(Reserved, Reg.first);
649+
// Reserve VGPRs/AGPRs.
650+
//
651+
unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
652+
unsigned MaxNumAGPRs = MaxNumVGPRs;
653+
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
654+
655+
// Reserve all the AGPRs if there are no instructions to use it.
656+
if (!ST.hasMAIInsts()) {
657+
for (unsigned i = 0; i < MaxNumAGPRs; ++i) {
658+
unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
659+
reserveRegisterTuples(Reserved, Reg);
660+
}
695661
}
696662

697-
// Reserve VGPRs used for SGPR spilling.
698-
// Note we treat freezeReservedRegs unusually because we run register
699-
// allocation in two phases. It's OK to re-freeze with new registers for the
700-
// second run.
701-
#if 0
702-
for (auto &SpilledFI : MFI->sgpr_spill_vgprs()) {
703-
for (auto &SpilledVGPR : SpilledFI.second)
704-
reserveRegisterTuples(Reserved, SpilledVGPR.VGPR);
663+
for (auto Reg : AMDGPU::AGPR_32RegClass) {
664+
Reserved.set(getSubReg(Reg, AMDGPU::hi16));
665+
}
666+
667+
// On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
668+
// a wave may have up to 512 total vector registers combining together both
669+
// VGPRs and AGPRs. Hence, in an entry function without calls and without
670+
// AGPRs used within it, it is possible to use the whole vector register
671+
// budget for VGPRs.
672+
//
673+
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
674+
// register file accordingly.
675+
if (ST.hasGFX90AInsts()) {
676+
if (MFI->usesAGPRs(MF)) {
677+
MaxNumVGPRs /= 2;
678+
MaxNumAGPRs = MaxNumVGPRs;
679+
} else {
680+
if (MaxNumVGPRs > TotalNumVGPRs) {
681+
MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
682+
MaxNumVGPRs = TotalNumVGPRs;
683+
} else
684+
MaxNumAGPRs = 0;
685+
}
686+
}
687+
688+
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
689+
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
690+
reserveRegisterTuples(Reserved, Reg);
691+
}
692+
693+
for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
694+
unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
695+
reserveRegisterTuples(Reserved, Reg);
696+
}
697+
698+
// On GFX908, in order to guarantee copying between AGPRs, we need a scratch
699+
// VGPR available at all times.
700+
if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
701+
reserveRegisterTuples(Reserved, AMDGPU::VGPR32);
702+
}
703+
704+
for (auto Reg : MFI->WWMReservedRegs) {
705+
reserveRegisterTuples(Reserved, Reg.first);
705706
}
706-
#endif
707707

708708
// FIXME: Stop using reserved registers for this.
709709
for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())

0 commit comments

Comments
 (0)