Skip to content

Commit 7e9b49f

Browse files
authored
AMDGPU: Add plumbing for private segment size argument (#96445)
The actual size of scratch/private is determined at dispatch time, so add more plumbing to request it. Will be used in subsequent change.
1 parent d6c7410 commit 7e9b49f

8 files changed

+34
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
115115
return std::tuple(
116116
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
117117
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
118+
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_SIZE:
119+
return {PrivateSegmentSize ? &PrivateSegmentSize : nullptr,
120+
&AMDGPU::SGPR_32RegClass, LLT::scalar(32)};
118121
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
119122
return std::tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
120123
&AMDGPU::SGPR_64RegClass,

llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,12 @@ struct AMDGPUFunctionArgInfo {
114114
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
115115
IMPLICIT_BUFFER_PTR = 15,
116116
IMPLICIT_ARG_PTR = 16,
117+
PRIVATE_SEGMENT_SIZE = 17,
117118

118119
// VGPRS:
119-
WORKITEM_ID_X = 17,
120-
WORKITEM_ID_Y = 18,
121-
WORKITEM_ID_Z = 19,
120+
WORKITEM_ID_X = 18,
121+
WORKITEM_ID_Y = 19,
122+
WORKITEM_ID_Z = 20,
122123
FIRST_VGPR_VALUE = WORKITEM_ID_X
123124
};
124125
// clang-format on

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
461461
KernelCodeProperties |=
462462
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
463463
}
464+
if (UserSGPRInfo.hasPrivateSegmentSize()) {
465+
KernelCodeProperties |=
466+
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
467+
}
464468
if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
465469
KernelCodeProperties |=
466470
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
@@ -1397,6 +1401,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
13971401
if (UserSGPRInfo.hasFlatScratchInit())
13981402
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
13991403

1404+
if (UserSGPRInfo.hasPrivateSegmentSize())
1405+
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
1406+
14001407
if (UserSGPRInfo.hasDispatchPtr())
14011408
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
14021409

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,9 @@ GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
11041104

11051105
if (hasFlatScratchInit())
11061106
NumUsedUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);
1107+
1108+
if (hasPrivateSegmentSize())
1109+
NumUsedUserSGPRs += getNumUserSGPRForField(PrivateSegmentSizeID);
11071110
}
11081111

11091112
void GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs(unsigned NumSGPRs) {

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,6 +1567,8 @@ class GCNUserSGPRUsageInfo {
15671567

15681568
bool hasFlatScratchInit() const { return FlatScratchInit; }
15691569

1570+
bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }
1571+
15701572
unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }
15711573

15721574
unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }
@@ -1631,6 +1633,8 @@ class GCNUserSGPRUsageInfo {
16311633

16321634
bool FlatScratchInit = false;
16331635

1636+
bool PrivateSegmentSize = false;
1637+
16341638
unsigned NumKernargPreloadSGPRs = 0;
16351639

16361640
unsigned NumUsedUserSGPRs = 0;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2468,6 +2468,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
24682468
CCInfo.AllocateReg(FlatScratchInitReg);
24692469
}
24702470

2471+
if (UserSGPRInfo.hasPrivateSegmentSize()) {
2472+
Register PrivateSegmentSizeReg = Info.addPrivateSegmentSize(TRI);
2473+
MF.addLiveIn(PrivateSegmentSizeReg, &AMDGPU::SGPR_32RegClass);
2474+
CCInfo.AllocateReg(PrivateSegmentSizeReg);
2475+
}
2476+
24712477
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
24722478
// these from the dispatch pointer.
24732479
}

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
232232
return ArgInfo.FlatScratchInit.getRegister();
233233
}
234234

235+
Register SIMachineFunctionInfo::addPrivateSegmentSize(const SIRegisterInfo &TRI) {
236+
ArgInfo.PrivateSegmentSize = ArgDescriptor::createRegister(getNextUserSGPR());
237+
NumUserSGPRs += 1;
238+
return ArgInfo.PrivateSegmentSize.getRegister();
239+
}
240+
235241
Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
236242
ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
237243
getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
752752
Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
753753
Register addDispatchID(const SIRegisterInfo &TRI);
754754
Register addFlatScratchInit(const SIRegisterInfo &TRI);
755+
Register addPrivateSegmentSize(const SIRegisterInfo &TRI);
755756
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
756757
Register addLDSKernelId();
757758
SmallVectorImpl<MCRegister> *

0 commit comments

Comments
 (0)