Skip to content

Commit 29dde91

Browse files
committed
[AMDGPU] Update target helpers & GCNSchedStrategy for dynamic VGPRs llvm#130047
1 parent c30cc5d commit 29dde91

File tree

6 files changed

+86
-0
lines changed

6 files changed

+86
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,6 +1251,12 @@ def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",
12511251
"Enable dynamic VGPR mode"
12521252
>;
12531253

1254+
def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32",
1255+
"DynamicVGPRBlockSize32",
1256+
"true",
1257+
"Use a block size of 32 for dynamic VGPR allocation (default is 16)"
1258+
>;
1259+
12541260
// Enable the use of SCRATCH_STORE/LOAD_BLOCK instructions for saving and
12551261
// restoring the callee-saved registers.
12561262
def FeatureUseBlockVGPROpsForCSR : SubtargetFeature<"block-vgpr-csr",

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,6 +1448,16 @@ bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
14481448
if (WavesAfter < DAG.MinOccupancy)
14491449
return true;
14501450

1451+
// For dynamic VGPR mode, we don't want to waste any VGPR blocks.
1452+
if (ST.isDynamicVGPREnabled()) {
1453+
unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
1454+
&ST, PressureBefore.getVGPRNum(false));
1455+
unsigned BlocksAfter = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(
1456+
&ST, PressureAfter.getVGPRNum(false));
1457+
if (BlocksAfter > BlocksBefore)
1458+
return true;
1459+
}
1460+
14511461
return false;
14521462
}
14531463

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
191191
unsigned MaxHardClauseLength = 0;
192192
bool SupportsSRAMECC = false;
193193
bool DynamicVGPR = false;
194+
bool DynamicVGPRBlockSize32 = false;
194195

195196
// This should not be used directly. 'TargetID' tracks the dynamic settings
196197
// for SRAMECC.

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,9 @@ unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
11531153
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
11541154
return 8;
11551155

1156+
if (STI->getFeatureBits().test(FeatureDynamicVGPR))
1157+
return STI->getFeatureBits().test(FeatureDynamicVGPRBlockSize32) ? 32 : 16;
1158+
11561159
bool IsWave32 = EnableWavefrontSize32 ?
11571160
*EnableWavefrontSize32 :
11581161
STI->getFeatureBits().test(FeatureWavefrontSize32);
@@ -1194,6 +1197,9 @@ unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
11941197
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
11951198
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
11961199
return 512;
1200+
if (STI->getFeatureBits().test(FeatureDynamicVGPR))
1201+
// On GFX12 we can allocate at most 8 blocks of VGPRs.
1202+
return 8 * getVGPRAllocGranule(STI);
11971203
return getAddressableNumArchVGPRs(STI);
11981204
}
11991205

llvm/unittests/Target/AMDGPU/AMDGPUUnitTests.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,24 @@ static void testGPRLimits(const char *RegName, bool TestW32W64,
152152
EXPECT_TRUE(ErrStr.empty()) << ErrStr;
153153
}
154154

155+
static void testDynamicVGPRLimits(StringRef CPUName, StringRef FS,
156+
TestFuncTy test) {
157+
auto TM = createAMDGPUTargetMachine("amdgcn-amd-", CPUName,
158+
"+dynamic-vgpr," + FS.str());
159+
ASSERT_TRUE(TM) << "No target machine";
160+
161+
GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
162+
std::string(TM->getTargetFeatureString()), *TM);
163+
ASSERT_TRUE(ST.getFeatureBits().test(AMDGPU::FeatureDynamicVGPR));
164+
165+
std::stringstream Table;
166+
bool Success = testAndRecord(Table, ST, test);
167+
EXPECT_TRUE(Success && !PrintCpuRegLimits)
168+
<< CPUName << " dynamic VGPR " << FS
169+
<< ":\nOcc MinVGPR MaxVGPR\n"
170+
<< Table.str() << '\n';
171+
}
172+
155173
TEST(AMDGPU, TestVGPRLimitsPerOccupancy) {
156174
auto test = [](std::stringstream &OS, unsigned Occ, const GCNSubtarget &ST) {
157175
unsigned MaxVGPRNum = ST.getAddressableNumVGPRs();
@@ -163,4 +181,48 @@ TEST(AMDGPU, TestVGPRLimitsPerOccupancy) {
163181
};
164182

165183
testGPRLimits("VGPR", true, test);
184+
185+
testDynamicVGPRLimits("gfx1200", "+wavefrontsize32", test);
186+
testDynamicVGPRLimits("gfx1200",
187+
"+wavefrontsize32,+dynamic-vgpr-block-size-32", test);
188+
}
189+
190+
static void testAbsoluteLimits(StringRef CPUName, StringRef FS,
191+
unsigned ExpectedMinOcc, unsigned ExpectedMaxOcc,
192+
unsigned ExpectedMaxVGPRs) {
193+
auto TM = createAMDGPUTargetMachine("amdgcn-amd-", CPUName, FS);
194+
ASSERT_TRUE(TM) << "No target machine";
195+
196+
GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
197+
std::string(TM->getTargetFeatureString()), *TM);
198+
199+
// Test function without attributes.
200+
LLVMContext Context;
201+
Module M("", Context);
202+
Function *Func =
203+
Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
204+
GlobalValue::ExternalLinkage, "testFunc", &M);
205+
Func->setCallingConv(CallingConv::AMDGPU_CS_Chain);
206+
Func->addFnAttr("amdgpu-flat-work-group-size", "1,32");
207+
208+
auto Range = ST.getWavesPerEU(*Func);
209+
EXPECT_EQ(ExpectedMinOcc, Range.first) << CPUName << ' ' << FS;
210+
EXPECT_EQ(ExpectedMaxOcc, Range.second) << CPUName << ' ' << FS;
211+
EXPECT_EQ(ExpectedMaxVGPRs, ST.getMaxNumVGPRs(*Func)) << CPUName << ' ' << FS;
212+
EXPECT_EQ(ExpectedMaxVGPRs, ST.getAddressableNumVGPRs())
213+
<< CPUName << ' ' << FS;
214+
215+
// Function with requested 'amdgpu-waves-per-eu' in a valid range.
216+
Func->addFnAttr("amdgpu-waves-per-eu", "10,12");
217+
Range = ST.getWavesPerEU(*Func);
218+
EXPECT_EQ(10u, Range.first) << CPUName << ' ' << FS;
219+
EXPECT_EQ(12u, Range.second) << CPUName << ' ' << FS;
220+
}
221+
222+
TEST(AMDGPU, TestOccupancyAbsoluteLimits) {
223+
testAbsoluteLimits("gfx1200", "+wavefrontsize32", 1, 16, 256);
224+
testAbsoluteLimits("gfx1200", "+wavefrontsize32,+dynamic-vgpr", 1, 16, 128);
225+
testAbsoluteLimits(
226+
"gfx1200", "+wavefrontsize32,+dynamic-vgpr,+dynamic-vgpr-block-size-32",
227+
1, 16, 256);
166228
}

llvm/unittests/Target/AMDGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ set(LLVM_LINK_COMPONENTS
1313
Core
1414
GlobalISel
1515
MC
16+
MIRParser
1617
Support
1718
TargetParser
1819
)

0 commit comments

Comments
 (0)