Skip to content

[AMDGPU][GlobaISel] wrap the load-splitting code in RegBank selection with condition #98966

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,7 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
Register DstReg = MI.getOperand(0).getReg();
const LLT LoadTy = MRI.getType(DstReg);
unsigned LoadSize = LoadTy.getSizeInBits();
MachineMemOperand *MMO = *MI.memoperands_begin();
const unsigned MaxNonSmrdLoadSize = 128;

const RegisterBank *DstBank =
Expand All @@ -1069,7 +1070,6 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
if (LoadSize != 32 && (LoadSize != 96 || Subtarget.hasScalarDwordx3Loads()))
return false;

MachineMemOperand *MMO = *MI.memoperands_begin();
const unsigned MemSize = 8 * MMO->getSize().getValue();
// Scalar loads of size 8 or 16 bit with proper alignment may be widened to
// 32 bit. Check to see if we need to widen the memory access, 8 or 16 bit
Expand Down Expand Up @@ -1142,25 +1142,29 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(
if (SrcRegs.empty())
SrcRegs.push_back(MI.getOperand(1).getReg());

assert(LoadSize % MaxNonSmrdLoadSize == 0);

// RegBankSelect only emits scalar types, so we need to reset the pointer
// operand to a pointer type.
Register BasePtrReg = SrcRegs[0];
LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
MRI.setType(BasePtrReg, PtrTy);

unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;
const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);
ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);
LegalizerHelper Helper(B.getMF(), O, B);

if (LoadTy.isVector()) {
if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
return false;
} else {
if (Helper.narrowScalar(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
return false;
// The following are the loads not splitted enough during legalization
// because it was not clear they are smem-load or vmem-load
if (AMDGPU::isExtendedGlobalAddrSpace(MMO->getAddrSpace()) ||
MMO->getAddrSpace() == AMDGPUAS::BUFFER_RESOURCE) {
assert(LoadSize % MaxNonSmrdLoadSize == 0);
unsigned NumSplitParts = LoadTy.getSizeInBits() / MaxNonSmrdLoadSize;
const LLT LoadSplitTy = LoadTy.divide(NumSplitParts);
ApplyRegBankMapping O(B, *this, MRI, &AMDGPU::VGPRRegBank);
LegalizerHelper Helper(B.getMF(), O, B);
if (LoadTy.isVector()) {
if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) !=
LegalizerHelper::Legalized)
return false;
} else {
if (Helper.narrowScalar(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
return false;
}
}

MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
Expand Down
Loading