Skip to content

Commit 78c6218

Browse files
committed
Localize symbols according to function linkage
1 parent 416d3bc commit 78c6218

16 files changed

+420
-418
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
357357
}
358358

359359
void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
360+
bool isLocal = F.hasLocalLinkage();
360361
if (F.isDeclaration() || !AMDGPU::isModuleEntryFunctionCC(F.getCallingConv()))
361362
return;
362363

@@ -375,8 +376,8 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
375376

376377
const uint64_t MaxScratchPerWorkitem =
377378
STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
378-
MCSymbol *ScratchSizeSymbol =
379-
RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);
379+
MCSymbol *ScratchSizeSymbol = RI.getSymbol(
380+
FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext, isLocal);
380381
uint64_t ScratchSize;
381382
if (ScratchSizeSymbol->isVariable() &&
382383
TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
@@ -389,7 +390,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
389390
// Validate addressable scalar registers (i.e., prior to added implicit
390391
// SGPRs).
391392
MCSymbol *NumSGPRSymbol =
392-
RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);
393+
RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext, isLocal);
393394
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
394395
!STM.hasSGPRInitBug()) {
395396
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
@@ -406,9 +407,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
406407
}
407408

408409
MCSymbol *VCCUsedSymbol =
409-
RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);
410-
MCSymbol *FlatUsedSymbol =
411-
RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);
410+
RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext, isLocal);
411+
MCSymbol *FlatUsedSymbol = RI.getSymbol(
412+
FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext, isLocal);
412413
uint64_t VCCUsed, FlatUsed, NumSgpr;
413414

414415
if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
@@ -435,9 +436,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
435436
}
436437

437438
MCSymbol *NumVgprSymbol =
438-
RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);
439+
RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext, isLocal);
439440
MCSymbol *NumAgprSymbol =
440-
RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);
441+
RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext, isLocal);
441442
uint64_t NumVgpr, NumAgpr;
442443

443444
MachineModuleInfo &MMI =
@@ -655,6 +656,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
655656

656657
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
657658
MCContext &Context = getObjFileLowering().getContext();
659+
bool isLocal = MF.getFunction().hasLocalLinkage();
658660
// FIXME: This should be an explicit check for Mesa.
659661
if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {
660662
MCSectionELF *ConfigSection =
@@ -700,20 +702,24 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
700702
{
701703
using RIK = MCResourceInfo::ResourceInfoKind;
702704
getTargetStreamer()->EmitMCResourceInfo(
703-
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),
704-
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),
705-
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),
705+
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext,
706+
isLocal),
707+
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext,
708+
isLocal),
709+
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext,
710+
isLocal),
706711
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
707-
OutContext),
708-
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),
712+
OutContext, isLocal),
713+
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext,
714+
isLocal),
709715
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
710-
OutContext),
716+
OutContext, isLocal),
711717
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
712-
OutContext),
713-
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,
714-
OutContext),
718+
OutContext, isLocal),
719+
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion, OutContext,
720+
isLocal),
715721
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
716-
OutContext));
722+
OutContext, isLocal));
717723
}
718724

719725
if (isVerbose()) {
@@ -726,19 +732,21 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
726732
OutStreamer->emitRawComment(" Function info:", false);
727733

728734
emitCommonFunctionComments(
729-
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)
735+
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext,
736+
isLocal)
730737
->getVariableValue(),
731-
STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),
732-
RIK::RIK_NumAGPR, OutContext)
733-
->getVariableValue()
734-
: nullptr,
738+
STM.hasMAIInsts()
739+
? RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR,
740+
OutContext, isLocal)
741+
->getVariableValue()
742+
: nullptr,
735743
RI.createTotalNumVGPRs(MF, Ctx),
736744
RI.createTotalNumSGPRs(
737745
MF,
738746
MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
739747
Ctx),
740748
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
741-
OutContext)
749+
OutContext, isLocal)
742750
->getVariableValue(),
743751
getFunctionCodeSize(MF), MFI);
744752
return false;
@@ -927,6 +935,7 @@ static const MCExpr *computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx) {
927935
void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
928936
const MachineFunction &MF) {
929937
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
938+
bool isLocal = MF.getFunction().hasLocalLinkage();
930939
MCContext &Ctx = MF.getContext();
931940

932941
auto CreateExpr = [&Ctx](int64_t Value) {
@@ -944,7 +953,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
944953

945954
auto GetSymRefExpr =
946955
[&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
947-
MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);
956+
MCSymbol *Sym =
957+
RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext, isLocal);
948958
return MCSymbolRefExpr::create(Sym, Ctx);
949959
};
950960

llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@
2323
using namespace llvm;
2424

2525
MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
26-
MCContext &OutContext) {
27-
auto GOCS = [FuncName, &OutContext](StringRef Suffix) {
28-
return OutContext.getOrCreateSymbol(
29-
Twine(OutContext.getAsmInfo()->getPrivateGlobalPrefix()) + FuncName +
30-
Twine(Suffix));
26+
MCContext &OutContext, bool isLocal) {
27+
auto GOCS = [FuncName, &OutContext, isLocal](StringRef Suffix) {
28+
StringRef Prefix =
29+
isLocal ? OutContext.getAsmInfo()->getPrivateGlobalPrefix() : "";
30+
return OutContext.getOrCreateSymbol(Twine(Prefix) + FuncName +
31+
Twine(Suffix));
3132
};
3233
switch (RIK) {
3334
case RIK_NumVGPR:
@@ -54,8 +55,8 @@ MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
5455

5556
const MCExpr *MCResourceInfo::getSymRefExpr(StringRef FuncName,
5657
ResourceInfoKind RIK,
57-
MCContext &Ctx) {
58-
return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx), Ctx);
58+
MCContext &Ctx, bool isLocal) {
59+
return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx, isLocal), Ctx);
5960
}
6061

6162
void MCResourceInfo::assignMaxRegs(MCContext &OutContext) {
@@ -83,33 +84,28 @@ void MCResourceInfo::finalize(MCContext &OutContext) {
8384
}
8485

8586
MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) {
86-
StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
87-
return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
88-
"amdgpu.max_num_vgpr");
87+
return OutContext.getOrCreateSymbol("amdgpu.max_num_vgpr");
8988
}
9089

9190
MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) {
92-
StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
93-
return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
94-
"amdgpu.max_num_agpr");
91+
return OutContext.getOrCreateSymbol("amdgpu.max_num_agpr");
9592
}
9693

9794
MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
98-
StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
99-
return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
100-
"amdgpu.max_num_sgpr");
95+
return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
10196
}
10297

10398
void MCResourceInfo::assignResourceInfoExpr(
10499
int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
105100
const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
106101
MCContext &OutContext) {
107102
const TargetMachine &TM = MF.getTarget();
103+
bool isLocal = MF.getFunction().hasLocalLinkage();
108104
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
109105
const MCConstantExpr *LocalConstExpr =
110106
MCConstantExpr::create(LocalValue, OutContext);
111107
const MCExpr *SymVal = LocalConstExpr;
112-
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
108+
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, isLocal);
113109
if (!Callees.empty()) {
114110
SmallVector<const MCExpr *, 8> ArgExprs;
115111
SmallPtrSet<const Function *, 8> Seen;
@@ -119,9 +115,10 @@ void MCResourceInfo::assignResourceInfoExpr(
119115
if (!Seen.insert(Callee).second)
120116
continue;
121117

118+
bool isCalleeLocal = Callee->hasLocalLinkage();
122119
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
123120
MCSymbol *CalleeValSym =
124-
getSymbol(CalleeFnSym->getName(), RIK, OutContext);
121+
getSymbol(CalleeFnSym->getName(), RIK, OutContext, isCalleeLocal);
125122

126123
// Avoid constructing recursive definitions by detecting whether `Sym` is
127124
// found transitively within any of its `CalleeValSym`.
@@ -164,6 +161,7 @@ void MCResourceInfo::gatherResourceInfo(
164161
MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
165162
MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
166163
MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
164+
bool isLocal = MF.getFunction().hasLocalLinkage();
167165

168166
if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())) {
169167
addMaxVGPRCandidate(FRI.NumVGPR);
@@ -181,7 +179,8 @@ void MCResourceInfo::gatherResourceInfo(
181179
FRI.Callees, OutContext);
182180
} else {
183181
const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext);
184-
MCSymbol *LocalNumSym = getSymbol(FnSym->getName(), RIK, OutContext);
182+
MCSymbol *LocalNumSym =
183+
getSymbol(FnSym->getName(), RIK, OutContext, isLocal);
185184
const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
186185
{MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext);
187186
LocalNumSym->setVariableValue(MaxWithLocal);
@@ -196,7 +195,8 @@ void MCResourceInfo::gatherResourceInfo(
196195
// The expression for private segment size should be: FRI.PrivateSegmentSize
197196
// + max(FRI.Callees, FRI.CalleeSegmentSize)
198197
SmallVector<const MCExpr *, 8> ArgExprs;
199-
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext);
198+
MCSymbol *Sym =
199+
getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext, isLocal);
200200
if (FRI.CalleeSegmentSize)
201201
ArgExprs.push_back(
202202
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
@@ -207,9 +207,11 @@ void MCResourceInfo::gatherResourceInfo(
207207
if (!Seen.insert(Callee).second)
208208
continue;
209209
if (!Callee->isDeclaration()) {
210+
bool isCalleeLocal = Callee->hasLocalLinkage();
210211
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
211212
MCSymbol *CalleeValSym =
212-
getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
213+
getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext,
214+
isCalleeLocal);
213215

214216
// Avoid constructing recursive definitions by detecting whether `Sym`
215217
// is found transitively within any of its `CalleeValSym`.
@@ -232,7 +234,7 @@ void MCResourceInfo::gatherResourceInfo(
232234
}
233235

234236
auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
235-
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
237+
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, isLocal);
236238
Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
237239
};
238240

@@ -264,21 +266,23 @@ const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
264266
MCContext &Ctx) {
265267
const TargetMachine &TM = MF.getTarget();
266268
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
269+
bool isLocal = MF.getFunction().hasLocalLinkage();
267270
return AMDGPUMCExpr::createTotalNumVGPR(
268-
getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx),
269-
getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx), Ctx);
271+
getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx, isLocal),
272+
getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx, isLocal), Ctx);
270273
}
271274

272275
const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
273276
bool hasXnack,
274277
MCContext &Ctx) {
275278
const TargetMachine &TM = MF.getTarget();
276279
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
280+
bool isLocal = MF.getFunction().hasLocalLinkage();
277281
return MCBinaryExpr::createAdd(
278-
getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx),
282+
getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx, isLocal),
279283
AMDGPUMCExpr::createExtraSGPRs(
280-
getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx),
281-
getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx), hasXnack,
282-
Ctx),
284+
getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx, isLocal),
285+
getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx, isLocal),
286+
hasXnack, Ctx),
283287
Ctx);
284288
}

llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ class MCResourceInfo {
7171
}
7272

7373
MCSymbol *getSymbol(StringRef FuncName, ResourceInfoKind RIK,
74-
MCContext &OutContext);
74+
MCContext &OutContext, bool isLocal);
7575
const MCExpr *getSymRefExpr(StringRef FuncName, ResourceInfoKind RIK,
76-
MCContext &Ctx);
76+
MCContext &Ctx, bool isLocal);
7777

7878
void reset();
7979

llvm/test/CodeGen/AMDGPU/agpr-register-count.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -154,28 +154,28 @@ bb:
154154
declare void @undef_func()
155155

156156
; GCN-LABEL: {{^}}kernel_call_undef_func:
157-
; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
158-
; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
159-
; GCN: .set .Lkernel_call_undef_func.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
160-
; GCN: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
161-
; GCN: NumVgprs: .Lkernel_call_undef_func.num_vgpr
162-
; GCN: NumAgprs: .Lkernel_call_undef_func.num_agpr
163-
; GCN: TotalNumVgprs: totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr)
164-
; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
165-
; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
166-
; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
167-
; GFX90A: AccumOffset: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
168-
; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
169-
; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
170-
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
157+
; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
158+
; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
159+
; GCN: .set kernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr)
160+
; GCN: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
161+
; GCN: NumVgprs: kernel_call_undef_func.num_vgpr
162+
; GCN: NumAgprs: kernel_call_undef_func.num_agpr
163+
; GCN: TotalNumVgprs: totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr)
164+
; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
165+
; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
166+
; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
167+
; GFX90A: AccumOffset: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
168+
; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
169+
; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
170+
; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
171171
define amdgpu_kernel void @kernel_call_undef_func() #0 {
172172
bb:
173173
call void @undef_func()
174174
ret void
175175
}
176176

177-
; GCN: .set .Lamdgpu.max_num_vgpr, 32
178-
; GCN-NEXT: .set .Lamdgpu.max_num_agpr, 32
179-
; GCN-NEXT: .set .Lamdgpu.max_num_sgpr, 34
177+
; GCN: .set amdgpu.max_num_vgpr, 32
178+
; GCN-NEXT: .set amdgpu.max_num_agpr, 32
179+
; GCN-NEXT: .set amdgpu.max_num_sgpr, 34
180180

181181
attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }

llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ bb:
6060
declare void @undef_func()
6161

6262
; CHECK: .type kernel_call_undef_func
63-
; CHECK: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
64-
; CHECK: NumAgprs: .Lkernel_call_undef_func.num_agpr
65-
; CHECK: .set .Lamdgpu.max_num_agpr, 32
63+
; CHECK: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
64+
; CHECK: NumAgprs: kernel_call_undef_func.num_agpr
65+
; CHECK: .set amdgpu.max_num_agpr, 32
6666
define amdgpu_kernel void @kernel_call_undef_func() #0 {
6767
bb:
6868
call void @undef_func()

0 commit comments

Comments
 (0)