Skip to content

Commit 0e17684

Browse files
authored
[AMDGPU] Speed up SIRegisterInfo::getReservedRegs (#79844)
reserveRegisterTuples is slow because it uses MCRegAliasIterator and hence ends up reserving the same aliased registers many times. This patch changes getReservedRegs not to use it for reserving SGPRs, VGPRs and AGPRs. Instead it iterates through base register classes, which should come closer to reserving each register once only. Overall this speeds up the time to run check-llvm-codegen-amdgpu in my Release build from 18.4 seconds to 16.9 seconds (all timings +/- 0.2).
1 parent f7ef73e commit 0e17684

File tree

5 files changed

+41
-16
lines changed

5 files changed

+41
-16
lines changed

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ class TargetRegisterClass {
119119
/// registers.
120120
bool isAllocatable() const { return MC->isAllocatable(); }
121121

122+
/// Return true if this register class has a defined BaseClassOrder.
123+
bool isBaseClass() const { return MC->isBaseClass(); }
124+
122125
/// Return true if the specified TargetRegisterClass
123126
/// is a proper sub-class of this TargetRegisterClass.
124127
bool hasSubClass(const TargetRegisterClass *RC) const {

llvm/include/llvm/MC/MCRegisterInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class MCRegisterClass {
4646
const uint16_t RegSizeInBits;
4747
const int8_t CopyCost;
4848
const bool Allocatable;
49+
const bool BaseClass;
4950

5051
/// getID() - Return the register class ID number.
5152
///
@@ -97,6 +98,9 @@ class MCRegisterClass {
9798
/// isAllocatable - Return true if this register class may be used to create
9899
/// virtual registers.
99100
bool isAllocatable() const { return Allocatable; }
101+
102+
/// Return true if this register class has a defined BaseClassOrder.
103+
bool isBaseClass() const { return BaseClass; }
100104
};
101105

102106
/// MCRegisterDesc - This record contains information about a particular

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -622,9 +622,15 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
622622
//
623623
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
624624
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
625-
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
626-
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
627-
reserveRegisterTuples(Reserved, Reg);
625+
for (const TargetRegisterClass *RC : regclasses()) {
626+
if (RC->isBaseClass() && isSGPRClass(RC)) {
627+
unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
628+
for (MCPhysReg Reg : *RC) {
629+
unsigned Index = getHWRegIndex(Reg);
630+
if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
631+
Reserved.set(Reg);
632+
}
633+
}
628634
}
629635

630636
Register ScratchRSrcReg = MFI->getScratchRSrcReg();
@@ -693,20 +699,29 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
693699
}
694700
}
695701

696-
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
697-
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
698-
reserveRegisterTuples(Reserved, Reg);
702+
for (const TargetRegisterClass *RC : regclasses()) {
703+
if (RC->isBaseClass() && isVGPRClass(RC)) {
704+
unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
705+
for (MCPhysReg Reg : *RC) {
706+
unsigned Index = getHWRegIndex(Reg);
707+
if (Index + NumRegs > MaxNumVGPRs)
708+
Reserved.set(Reg);
709+
}
710+
}
699711
}
700712

701-
if (ST.hasMAIInsts()) {
702-
for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
703-
unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
704-
reserveRegisterTuples(Reserved, Reg);
713+
// Reserve all the AGPRs if there are no instructions to use it.
714+
if (!ST.hasMAIInsts())
715+
MaxNumAGPRs = 0;
716+
for (const TargetRegisterClass *RC : regclasses()) {
717+
if (RC->isBaseClass() && isAGPRClass(RC)) {
718+
unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
719+
for (MCPhysReg Reg : *RC) {
720+
unsigned Index = getHWRegIndex(Reg);
721+
if (Index + NumRegs > MaxNumAGPRs)
722+
Reserved.set(Reg);
723+
}
705724
}
706-
} else {
707-
// Reserve all the AGPRs if there are no instructions to use it.
708-
for (MCRegister Reg : AMDGPU::AGPR_32RegClass)
709-
reserveRegisterTuples(Reserved, Reg);
710725
}
711726

712727
// On GFX908, in order to guarantee copying between AGPRs, we need a scratch

llvm/unittests/CodeGen/MachineInstrTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,8 @@ TEST(MachineInstrTest, SpliceOperands) {
530530
EXPECT_EQ(MI->getOperand(8).getImm(), MachineOperand::CreateImm(4).getImm());
531531

532532
// test tied operands
533-
MCRegisterClass MRC{0, 0, 0, 0, 0, 0, 0, 0, /*Allocatable=*/true};
533+
MCRegisterClass MRC{
534+
0, 0, 0, 0, 0, 0, 0, 0, /*Allocatable=*/true, /*BaseClass=*/true};
534535
TargetRegisterClass RC{&MRC, 0, 0, {}, 0, 0, 0, 0, 0, 0, 0};
535536
// MachineRegisterInfo will be very upset if these registers aren't
536537
// allocatable.

llvm/utils/TableGen/RegisterInfoEmitter.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
10691069
<< RegClassStrings.get(RC.getName()) << ", " << RC.getOrder().size()
10701070
<< ", " << RCBitsSize << ", " << RC.getQualifiedIdName() << ", "
10711071
<< RegSize << ", " << RC.CopyCost << ", "
1072-
<< (RC.Allocatable ? "true" : "false") << " },\n";
1072+
<< (RC.Allocatable ? "true" : "false") << ", "
1073+
<< (RC.getBaseClassOrder() ? "true" : "false") << " },\n";
10731074
}
10741075

10751076
OS << "};\n\n";
@@ -1846,6 +1847,7 @@ void RegisterInfoEmitter::debugDump(raw_ostream &OS) {
18461847
OS << "\tCoveredBySubRegs: " << RC.CoveredBySubRegs << '\n';
18471848
OS << "\tAllocatable: " << RC.Allocatable << '\n';
18481849
OS << "\tAllocationPriority: " << unsigned(RC.AllocationPriority) << '\n';
1850+
OS << "\tBaseClassOrder: " << RC.getBaseClassOrder() << '\n';
18491851
OS << "\tRegs:";
18501852
for (const CodeGenRegister *R : RC.getMembers()) {
18511853
OS << " " << R->getName();

0 commit comments

Comments
 (0)