Skip to content

Commit 343be51

Browse files
committed
[AMDGPU] Add utilities to track number of user SGPRs. NFC.
Factor out and unify some common code that calculates and tracks the number of user SGRPs. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D159439
1 parent 9048aa7 commit 343be51

10 files changed

+229
-159
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -393,28 +393,29 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
393393
const MachineFunction &MF) const {
394394
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
395395
uint16_t KernelCodeProperties = 0;
396+
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();
396397

397-
if (MFI.hasPrivateSegmentBuffer()) {
398+
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
398399
KernelCodeProperties |=
399400
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
400401
}
401-
if (MFI.hasDispatchPtr()) {
402+
if (UserSGPRInfo.hasDispatchPtr()) {
402403
KernelCodeProperties |=
403404
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
404405
}
405-
if (MFI.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
406+
if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
406407
KernelCodeProperties |=
407408
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
408409
}
409-
if (MFI.hasKernargSegmentPtr()) {
410+
if (UserSGPRInfo.hasKernargSegmentPtr()) {
410411
KernelCodeProperties |=
411412
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
412413
}
413-
if (MFI.hasDispatchID()) {
414+
if (UserSGPRInfo.hasDispatchID()) {
414415
KernelCodeProperties |=
415416
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
416417
}
417-
if (MFI.hasFlatScratchInit()) {
418+
if (UserSGPRInfo.hasFlatScratchInit()) {
418419
KernelCodeProperties |=
419420
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
420421
}
@@ -1165,27 +1166,28 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
11651166
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
11661167
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
11671168

1168-
if (MFI->hasPrivateSegmentBuffer()) {
1169+
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
1170+
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
11691171
Out.code_properties |=
11701172
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
11711173
}
11721174

1173-
if (MFI->hasDispatchPtr())
1175+
if (UserSGPRInfo.hasDispatchPtr())
11741176
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
11751177

1176-
if (MFI->hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
1178+
if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
11771179
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
11781180

1179-
if (MFI->hasKernargSegmentPtr())
1181+
if (UserSGPRInfo.hasKernargSegmentPtr())
11801182
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
11811183

1182-
if (MFI->hasDispatchID())
1184+
if (UserSGPRInfo.hasDispatchID())
11831185
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
11841186

1185-
if (MFI->hasFlatScratchInit())
1187+
if (UserSGPRInfo.hasFlatScratchInit())
11861188
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
11871189

1188-
if (MFI->hasDispatchPtr())
1190+
if (UserSGPRInfo.hasDispatchPtr())
11891191
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
11901192

11911193
if (STM.isXNACKEnabled())

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -455,27 +455,28 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
455455
const SIRegisterInfo &TRI,
456456
SIMachineFunctionInfo &Info) {
457457
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
458-
if (Info.hasPrivateSegmentBuffer()) {
458+
const GCNUserSGPRUsageInfo &UserSGPRInfo = Info.getUserSGPRInfo();
459+
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
459460
Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
460461
MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
461462
CCInfo.AllocateReg(PrivateSegmentBufferReg);
462463
}
463464

464-
if (Info.hasDispatchPtr()) {
465+
if (UserSGPRInfo.hasDispatchPtr()) {
465466
Register DispatchPtrReg = Info.addDispatchPtr(TRI);
466467
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
467468
CCInfo.AllocateReg(DispatchPtrReg);
468469
}
469470

470471
const Module *M = MF.getFunction().getParent();
471-
if (Info.hasQueuePtr() &&
472+
if (UserSGPRInfo.hasQueuePtr() &&
472473
AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
473474
Register QueuePtrReg = Info.addQueuePtr(TRI);
474475
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
475476
CCInfo.AllocateReg(QueuePtrReg);
476477
}
477478

478-
if (Info.hasKernargSegmentPtr()) {
479+
if (UserSGPRInfo.hasKernargSegmentPtr()) {
479480
MachineRegisterInfo &MRI = MF.getRegInfo();
480481
Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
481482
const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
@@ -486,13 +487,13 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
486487
CCInfo.AllocateReg(InputPtrReg);
487488
}
488489

489-
if (Info.hasDispatchID()) {
490+
if (UserSGPRInfo.hasDispatchID()) {
490491
Register DispatchIDReg = Info.addDispatchID(TRI);
491492
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
492493
CCInfo.AllocateReg(DispatchIDReg);
493494
}
494495

495-
if (Info.hasFlatScratchInit()) {
496+
if (UserSGPRInfo.hasFlatScratchInit()) {
496497
Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
497498
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
498499
CCInfo.AllocateReg(FlatScratchInitReg);
@@ -597,15 +598,16 @@ bool AMDGPUCallLowering::lowerFormalArguments(
597598

598599
SmallVector<CCValAssign, 16> ArgLocs;
599600
CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
601+
const GCNUserSGPRUsageInfo &UserSGPRInfo = Info->getUserSGPRInfo();
600602

601-
if (Info->hasImplicitBufferPtr()) {
603+
if (UserSGPRInfo.hasImplicitBufferPtr()) {
602604
Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
603605
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
604606
CCInfo.AllocateReg(ImplicitBufferPtrReg);
605607
}
606608

607609
// FIXME: This probably isn't defined for mesa
608-
if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
610+
if (UserSGPRInfo.hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
609611
Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
610612
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
611613
CCInfo.AllocateReg(FlatScratchInitReg);

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
11031103
Offset += 8; // Skipped.
11041104
}
11051105

1106-
if (MFI.hasQueuePtr())
1106+
if (MFI.getUserSGPRInfo().hasQueuePtr())
11071107
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
11081108
}
11091109

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
185185
//
186186
// If we only have implicit uses of flat_scr on flat instructions, it is not
187187
// really needed.
188-
if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() &&
188+
if (Info.UsesFlatScratch && !MFI->getUserSGPRInfo().hasFlatScratchInit() &&
189189
(!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) &&
190190
!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) &&
191191
!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) {

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 88 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "AMDGPULegalizerInfo.h"
1818
#include "AMDGPURegisterBankInfo.h"
1919
#include "AMDGPUTargetMachine.h"
20+
#include "GCNSubtarget.h"
2021
#include "R600Subtarget.h"
2122
#include "SIMachineFunctionInfo.h"
2223
#include "Utils/AMDGPUBaseInfo.h"
@@ -692,7 +693,7 @@ GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratch) const {
692693

693694
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
694695
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
695-
return getBaseReservedNumSGPRs(MFI.hasFlatScratchInit());
696+
return getBaseReservedNumSGPRs(MFI.getUserSGPRInfo().hasFlatScratchInit());
696697
}
697698

698699
unsigned GCNSubtarget::getReservedNumSGPRs(const Function &F) const {
@@ -770,25 +771,27 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
770771
getReservedNumSGPRs(MF));
771772
}
772773

773-
static unsigned getMaxNumPreloadedSGPRs() {
774+
static constexpr unsigned getMaxNumPreloadedSGPRs() {
775+
using USI = GCNUserSGPRUsageInfo;
774776
// Max number of user SGPRs
775-
unsigned MaxUserSGPRs = 4 + // private segment buffer
776-
2 + // Dispatch ptr
777-
2 + // queue ptr
778-
2 + // kernel segment ptr
779-
2 + // dispatch ID
780-
2 + // flat scratch init
781-
2; // Implicit buffer ptr
777+
const unsigned MaxUserSGPRs =
778+
USI::getNumUserSGPRForField(USI::PrivateSegmentBufferID) +
779+
USI::getNumUserSGPRForField(USI::DispatchPtrID) +
780+
USI::getNumUserSGPRForField(USI::QueuePtrID) +
781+
USI::getNumUserSGPRForField(USI::KernargSegmentPtrID) +
782+
USI::getNumUserSGPRForField(USI::DispatchIdID) +
783+
USI::getNumUserSGPRForField(USI::FlatScratchInitID) +
784+
USI::getNumUserSGPRForField(USI::ImplicitBufferPtrID);
782785

783786
// Max number of system SGPRs
784-
unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
785-
1 + // WorkGroupIDY
786-
1 + // WorkGroupIDZ
787-
1 + // WorkGroupInfo
788-
1; // private segment wave byte offset
787+
const unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
788+
1 + // WorkGroupIDY
789+
1 + // WorkGroupIDZ
790+
1 + // WorkGroupInfo
791+
1; // private segment wave byte offset
789792

790793
// Max number of synthetic SGPRs
791-
unsigned SyntheticSGPRs = 1; // LDSKernelId
794+
const unsigned SyntheticSGPRs = 1; // LDSKernelId
792795

793796
return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;
794797
}
@@ -1018,3 +1021,73 @@ const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Funct
10181021
else
10191022
return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
10201023
}
1024+
1025+
GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo(const Function &F,
1026+
const GCNSubtarget &ST) {
1027+
const CallingConv::ID CC = F.getCallingConv();
1028+
const bool IsKernel =
1029+
CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL;
1030+
// FIXME: Should have analysis or something rather than attribute to detect
1031+
// calls.
1032+
const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
1033+
// FIXME: This attribute is a hack, we just need an analysis on the function
1034+
// to look for allocas.
1035+
const bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
1036+
1037+
if (IsKernel && (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0))
1038+
KernargSegmentPtr = true;
1039+
1040+
bool IsAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
1041+
if (IsAmdHsaOrMesa && !ST.enableFlatScratch())
1042+
PrivateSegmentBuffer = true;
1043+
else if (ST.isMesaGfxShader(F))
1044+
ImplicitBufferPtr = true;
1045+
1046+
if (!AMDGPU::isGraphics(CC)) {
1047+
if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
1048+
DispatchPtr = true;
1049+
1050+
// FIXME: Can this always be disabled with < COv5?
1051+
if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
1052+
QueuePtr = true;
1053+
1054+
if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
1055+
DispatchID = true;
1056+
}
1057+
1058+
// TODO: This could be refined a lot. The attribute is a poor way of
1059+
// detecting calls or stack objects that may require it before argument
1060+
// lowering.
1061+
if (ST.hasFlatAddressSpace() && AMDGPU::isEntryFunctionCC(CC) &&
1062+
(IsAmdHsaOrMesa || ST.enableFlatScratch()) &&
1063+
(HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
1064+
!ST.flatScratchIsArchitected()) {
1065+
FlatScratchInit = true;
1066+
}
1067+
}
1068+
1069+
unsigned GCNUserSGPRUsageInfo::getNumUsedUserSGPRs() const {
1070+
unsigned NumUserSGPRs = 0;
1071+
if (hasImplicitBufferPtr())
1072+
NumUserSGPRs += getNumUserSGPRForField(ImplicitBufferPtrID);
1073+
1074+
if (hasPrivateSegmentBuffer())
1075+
NumUserSGPRs += getNumUserSGPRForField(PrivateSegmentBufferID);
1076+
1077+
if (hasDispatchPtr())
1078+
NumUserSGPRs += getNumUserSGPRForField(DispatchPtrID);
1079+
1080+
if (hasQueuePtr())
1081+
NumUserSGPRs += getNumUserSGPRForField(QueuePtrID);
1082+
1083+
if (hasKernargSegmentPtr())
1084+
NumUserSGPRs += getNumUserSGPRForField(KernargSegmentPtrID);
1085+
1086+
if (hasDispatchID())
1087+
NumUserSGPRs += getNumUserSGPRForField(DispatchIdID);
1088+
1089+
if (hasFlatScratchInit())
1090+
NumUserSGPRs += getNumUserSGPRForField(FlatScratchInitID);
1091+
1092+
return NumUserSGPRs;
1093+
}

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "SIInstrInfo.h"
2323
#include "Utils/AMDGPUBaseInfo.h"
2424
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
25+
#include "llvm/Support/ErrorHandling.h"
2526

2627
#define GET_SUBTARGETINFO_HEADER
2728
#include "AMDGPUGenSubtargetInfo.inc"
@@ -1378,6 +1379,79 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13781379
}
13791380
};
13801381

1382+
class GCNUserSGPRUsageInfo {
1383+
public:
1384+
unsigned getNumUsedUserSGPRs() const;
1385+
1386+
bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }
1387+
1388+
bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }
1389+
1390+
bool hasDispatchPtr() const { return DispatchPtr; }
1391+
1392+
bool hasQueuePtr() const { return QueuePtr; }
1393+
1394+
bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }
1395+
1396+
bool hasDispatchID() const { return DispatchID; }
1397+
1398+
bool hasFlatScratchInit() const { return FlatScratchInit; }
1399+
1400+
enum UserSGPRID : unsigned {
1401+
ImplicitBufferPtrID = 0,
1402+
PrivateSegmentBufferID = 1,
1403+
DispatchPtrID = 2,
1404+
QueuePtrID = 3,
1405+
KernargSegmentPtrID = 4,
1406+
DispatchIdID = 5,
1407+
FlatScratchInitID = 6,
1408+
PrivateSegmentSizeID = 7
1409+
};
1410+
1411+
// Returns the size in number of SGPRs for preload user SGPR field.
1412+
static constexpr unsigned getNumUserSGPRForField(UserSGPRID ID) {
1413+
switch (ID) {
1414+
case ImplicitBufferPtrID:
1415+
return 2;
1416+
case PrivateSegmentBufferID:
1417+
return 4;
1418+
case DispatchPtrID:
1419+
return 2;
1420+
case QueuePtrID:
1421+
return 2;
1422+
case KernargSegmentPtrID:
1423+
return 2;
1424+
case DispatchIdID:
1425+
return 2;
1426+
case FlatScratchInitID:
1427+
return 2;
1428+
case PrivateSegmentSizeID:
1429+
return 1;
1430+
}
1431+
llvm_unreachable("Unknown UserSGPRID.");
1432+
}
1433+
1434+
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);
1435+
1436+
private:
1437+
// Private memory buffer
1438+
// Compute directly in sgpr[0:1]
1439+
// Other shaders indirect 64-bits at sgpr[0:1]
1440+
bool ImplicitBufferPtr = false;
1441+
1442+
bool PrivateSegmentBuffer = false;
1443+
1444+
bool DispatchPtr = false;
1445+
1446+
bool QueuePtr = false;
1447+
1448+
bool KernargSegmentPtr = false;
1449+
1450+
bool DispatchID = false;
1451+
1452+
bool FlatScratchInit = false;
1453+
};
1454+
13811455
} // end namespace llvm
13821456

13831457
#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H

0 commit comments

Comments
 (0)