Skip to content

AMDGPU: Add plumbing for private segment size argument #96445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
return std::tuple(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_SIZE:
return {PrivateSegmentSize ? &PrivateSegmentSize : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32)};
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
return std::tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,12 @@ struct AMDGPUFunctionArgInfo {
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
IMPLICIT_BUFFER_PTR = 15,
IMPLICIT_ARG_PTR = 16,
PRIVATE_SEGMENT_SIZE = 17,

// VGPRS:
WORKITEM_ID_X = 17,
WORKITEM_ID_Y = 18,
WORKITEM_ID_Z = 19,
WORKITEM_ID_X = 18,
WORKITEM_ID_Y = 19,
WORKITEM_ID_Z = 20,
FIRST_VGPR_VALUE = WORKITEM_ID_X
};
// clang-format on
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
if (UserSGPRInfo.hasPrivateSegmentSize()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
}
if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
Expand Down Expand Up @@ -1397,6 +1401,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
if (UserSGPRInfo.hasFlatScratchInit())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;

if (UserSGPRInfo.hasPrivateSegmentSize())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;

if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,9 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,

if (hasFlatScratchInit())
NumUsedUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);

if (hasPrivateSegmentSize())
NumUsedUserSGPRs += getNumUserSGPRForField(PrivateSegmentSizeID);
}

void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1567,6 +1567,8 @@ class GCNUserSGPRUsageInfo {

bool hasFlatScratchInit() const { return FlatScratchInit; }

bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }

unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }

unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
Expand Down Expand Up @@ -1631,6 +1633,8 @@ class GCNUserSGPRUsageInfo {

bool FlatScratchInit = false;

bool PrivateSegmentSize = false;

unsigned NumKernargPreloadSGPRs = 0;

unsigned NumUsedUserSGPRs = 0;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2468,6 +2468,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(FlatScratchInitReg);
}

if (UserSGPRInfo.hasPrivateSegmentSize()) {
Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(TRI);
MF.addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(PrivateSegmentSizeReg);
}

// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return ArgInfo.FlatScratchInit.getRegister();
}

Register SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo &TRI) {
ArgInfo.PrivateSegmentSize = ArgDescriptor::createRegister(getNextUserSGPR());
NumUserSGPRs += 1;
return ArgInfo.PrivateSegmentSize.getRegister();
}

Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
Register addDispatchID(const SIRegisterInfo &TRI);
Register addFlatScratchInit(const SIRegisterInfo &TRI);
Register addPrivateSegmentSize(const SIRegisterInfo &TRI);
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
Register addLDSKernelId();
SmallVectorImpl<MCRegister> *
Expand Down
Loading