Skip to content

[AMDGPU] Speed up SIRegisterInfo::getReservedRegs #79844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/TargetRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ class TargetRegisterClass {
/// registers.
bool isAllocatable() const { return MC->isAllocatable(); }

/// Return true if this register class has a defined BaseClassOrder.
bool isBaseClass() const { return MC->isBaseClass(); }

/// Return true if the specified TargetRegisterClass
/// is a proper sub-class of this TargetRegisterClass.
bool hasSubClass(const TargetRegisterClass *RC) const {
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/MC/MCRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class MCRegisterClass {
const uint16_t RegSizeInBits;
const int8_t CopyCost;
const bool Allocatable;
const bool BaseClass;

/// getID() - Return the register class ID number.
///
Expand Down Expand Up @@ -97,6 +98,9 @@ class MCRegisterClass {
/// isAllocatable - Return true if this register class may be used to create
/// virtual registers.
bool isAllocatable() const { return Allocatable; }

/// Return true if this register class has a defined BaseClassOrder.
bool isBaseClass() const { return BaseClass; }
};

/// MCRegisterDesc - This record contains information about a particular
Expand Down
43 changes: 29 additions & 14 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,9 +622,15 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
//
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
for (const TargetRegisterClass *RC : regclasses()) {
if (RC->isBaseClass() && isSGPRClass(RC)) {
unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
for (MCPhysReg Reg : *RC) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can get away with drop_front. Plus I still think we should move reserved regs to be stored as reserved reg units

unsigned Index = getHWRegIndex(Reg);
if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
Reserved.set(Reg);
}
}
}

Register ScratchRSrcReg = MFI->getScratchRSrcReg();
Expand Down Expand Up @@ -693,20 +699,29 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}

for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
for (const TargetRegisterClass *RC : regclasses()) {
if (RC->isBaseClass() && isVGPRClass(RC)) {
unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
for (MCPhysReg Reg : *RC) {
unsigned Index = getHWRegIndex(Reg);
if (Index + NumRegs > MaxNumVGPRs)
Reserved.set(Reg);
}
}
}

if (ST.hasMAIInsts()) {
for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
// Reserve all the AGPRs if there are no instructions to use it.
if (!ST.hasMAIInsts())
MaxNumAGPRs = 0;
for (const TargetRegisterClass *RC : regclasses()) {
if (RC->isBaseClass() && isAGPRClass(RC)) {
unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
for (MCPhysReg Reg : *RC) {
unsigned Index = getHWRegIndex(Reg);
if (Index + NumRegs > MaxNumAGPRs)
Reserved.set(Reg);
}
}
} else {
// Reserve all the AGPRs if there are no instructions to use it.
for (MCRegister Reg : AMDGPU::AGPR_32RegClass)
reserveRegisterTuples(Reserved, Reg);
}

// On GFX908, in order to guarantee copying between AGPRs, we need a scratch
Expand Down
3 changes: 2 additions & 1 deletion llvm/unittests/CodeGen/MachineInstrTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,8 @@ TEST(MachineInstrTest, SpliceOperands) {
EXPECT_EQ(MI->getOperand(8).getImm(), MachineOperand::CreateImm(4).getImm());

// test tied operands
MCRegisterClass MRC{0, 0, 0, 0, 0, 0, 0, 0, /*Allocatable=*/true};
MCRegisterClass MRC{
0, 0, 0, 0, 0, 0, 0, 0, /*Allocatable=*/true, /*BaseClass=*/true};
TargetRegisterClass RC{&MRC, 0, 0, {}, 0, 0, 0, 0, 0, 0, 0};
// MachineRegisterInfo will be very upset if these registers aren't
// allocatable.
Expand Down
4 changes: 3 additions & 1 deletion llvm/utils/TableGen/RegisterInfoEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
<< RegClassStrings.get(RC.getName()) << ", " << RC.getOrder().size()
<< ", " << RCBitsSize << ", " << RC.getQualifiedIdName() << ", "
<< RegSize << ", " << RC.CopyCost << ", "
<< (RC.Allocatable ? "true" : "false") << " },\n";
<< (RC.Allocatable ? "true" : "false") << ", "
<< (RC.getBaseClassOrder() ? "true" : "false") << " },\n";
}

OS << "};\n\n";
Expand Down Expand Up @@ -1846,6 +1847,7 @@ void RegisterInfoEmitter::debugDump(raw_ostream &OS) {
OS << "\tCoveredBySubRegs: " << RC.CoveredBySubRegs << '\n';
OS << "\tAllocatable: " << RC.Allocatable << '\n';
OS << "\tAllocationPriority: " << unsigned(RC.AllocationPriority) << '\n';
OS << "\tBaseClassOrder: " << RC.getBaseClassOrder() << '\n';
OS << "\tRegs:";
for (const CodeGenRegister *R : RC.getMembers()) {
OS << " " << R->getName();
Expand Down