-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Add plumbing for private segment size argument #96445
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The actual size of scratch/private is determined at dispatch time, so add more plumbing to request it. Will be used in subsequent change.
@llvm/pr-subscribers-backend-amdgpu Author: Nicolai Hähnle (nhaehnle) ChangesThe actual size of scratch/private is determined at dispatch time, so add more plumbing to request it. Will be used in subsequent change. Full diff: https://github.com/llvm/llvm-project/pull/96445.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index de25f9241a503..f57fc168c1dfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -115,6 +115,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
return std::tuple(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+ case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_SIZE:
+ return {PrivateSegmentSize ? &PrivateSegmentSize : nullptr,
+ &AMDGPU::SGPR_32RegClass, LLT::scalar(32)};
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
return std::tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index 42b33c50d9f8c..2e02bb4271adc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -114,11 +114,12 @@ struct AMDGPUFunctionArgInfo {
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
IMPLICIT_BUFFER_PTR = 15,
IMPLICIT_ARG_PTR = 16,
+ PRIVATE_SEGMENT_SIZE = 17,
// VGPRS:
- WORKITEM_ID_X = 17,
- WORKITEM_ID_Y = 18,
- WORKITEM_ID_Z = 19,
+ WORKITEM_ID_X = 18,
+ WORKITEM_ID_Y = 19,
+ WORKITEM_ID_Z = 20,
FIRST_VGPR_VALUE = WORKITEM_ID_X
};
// clang-format on
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d8e22f4b0d8fa..bb85b03ebb5e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -461,6 +461,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
+ if (UserSGPRInfo.hasPrivateSegmentSize()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+ }
if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
@@ -1397,6 +1401,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
if (UserSGPRInfo.hasFlatScratchInit())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ if (UserSGPRInfo.hasPrivateSegmentSize())
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
+
if (UserSGPRInfo.hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0751c8dc8b8bf..a8e26f104f588 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1104,6 +1104,9 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
if (hasFlatScratchInit())
NumUsedUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);
+
+ if (hasPrivateSegmentSize())
+ NumUsedUserSGPRs += getNumUserSGPRForField(PrivateSegmentSizeID);
}
void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 74c8f85f1b031..bb0746c5b5365 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1567,6 +1567,8 @@ class GCNUserSGPRUsageInfo {
bool hasFlatScratchInit() const { return FlatScratchInit; }
+ bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
+
unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
@@ -1631,6 +1633,8 @@ class GCNUserSGPRUsageInfo {
bool FlatScratchInit = false;
+ bool PrivateSegmentSize = false;
+
unsigned NumKernargPreloadSGPRs = 0;
unsigned NumUsedUserSGPRs = 0;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4f8882ed1cc96..19a928a31fc80 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2468,6 +2468,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(FlatScratchInitReg);
}
+ if (UserSGPRInfo.hasPrivateSegmentSize()) {
+ Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(TRI);
+ MF.addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(PrivateSegmentSizeReg);
+ }
+
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 072c5aedc220b..b4496d9dfa993 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -233,6 +233,12 @@ Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
return ArgInfo.FlatScratchInit.getRegister();
}
+Register SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo &TRI) {
+ ArgInfo.PrivateSegmentSize = ArgDescriptor::createRegister(getNextUserSGPR());
+ NumUserSGPRs += 1;
+ return ArgInfo.PrivateSegmentSize.getRegister();
+}
+
Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9fe02e24c8a15..7af5e7388f841 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -752,6 +752,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
Register addDispatchID(const SIRegisterInfo &TRI);
Register addFlatScratchInit(const SIRegisterInfo &TRI);
+ Register addPrivateSegmentSize(const SIRegisterInfo &TRI);
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
Register addLDSKernelId();
SmallVectorImpl<MCRegister> *
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a shame we can't just pull this out from somewhere with s_getreg
The actual size of scratch/private is determined at dispatch time, so add more plumbing to request it. Will be used in subsequent change.
The actual size of scratch/private is determined at dispatch time, so add more plumbing to request it. Will be used in subsequent change.