Skip to content

[AMDGPU] Fix resource usage information for unnamed functions #115320

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 29 additions & 22 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {

using RIK = MCResourceInfo::ResourceInfoKind;
const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);
MCSymbol *FnSym = TM.getSymbol(&F);

auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {
int64_t Val;
Expand All @@ -381,7 +382,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
const uint64_t MaxScratchPerWorkitem =
STM.getMaxWaveScratchSize() / STM.getWavefrontSize();
MCSymbol *ScratchSizeSymbol =
RI.getSymbol(F.getName(), RIK::RIK_PrivateSegSize, OutContext);
RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);
uint64_t ScratchSize;
if (ScratchSizeSymbol->isVariable() &&
TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&
Expand All @@ -394,7 +395,7 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
// Validate addressable scalar registers (i.e., prior to added implicit
// SGPRs).
MCSymbol *NumSGPRSymbol =
RI.getSymbol(F.getName(), RIK::RIK_NumSGPR, OutContext);
RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
Expand All @@ -411,9 +412,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
}

MCSymbol *VCCUsedSymbol =
RI.getSymbol(F.getName(), RIK::RIK_UsesVCC, OutContext);
RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);
MCSymbol *FlatUsedSymbol =
RI.getSymbol(F.getName(), RIK::RIK_UsesFlatScratch, OutContext);
RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);
uint64_t VCCUsed, FlatUsed, NumSgpr;

if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&
Expand All @@ -440,9 +441,9 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
}

MCSymbol *NumVgprSymbol =
RI.getSymbol(F.getName(), RIK::RIK_NumVGPR, OutContext);
RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);
MCSymbol *NumAgprSymbol =
RI.getSymbol(F.getName(), RIK::RIK_NumAGPR, OutContext);
RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);
uint64_t NumVgpr, NumAgpr;

MachineModuleInfo &MMI =
Expand Down Expand Up @@ -705,15 +706,20 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
{
using RIK = MCResourceInfo::ResourceInfoKind;
getTargetStreamer()->EmitMCResourceInfo(
RI.getSymbol(MF.getName(), RIK::RIK_NumVGPR, OutContext),
RI.getSymbol(MF.getName(), RIK::RIK_NumAGPR, OutContext),
RI.getSymbol(MF.getName(), RIK::RIK_NumSGPR, OutContext),
RI.getSymbol(MF.getName(), RIK::RIK_PrivateSegSize, OutContext),
RI.getSymbol(MF.getName(), RIK::RIK_UsesVCC, OutContext),
RI.getSymbol(MF.getName(), RIK::RIK_UsesFlatScratch, OutContext),
RI.getSymbol(MF.getName(), RIK::RIK_HasDynSizedStack, OutContext),
RI.getSymbol(MF.getName(), RIK::RIK_HasRecursion, OutContext),
RI.getSymbol(MF.getName(), RIK::RIK_HasIndirectCall, OutContext));
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
OutContext),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,
OutContext),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,
OutContext),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,
OutContext),
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,
OutContext));
}

if (isVerbose()) {
Expand All @@ -726,18 +732,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" Function info:", false);

emitCommonFunctionComments(
RI.getSymbol(MF.getName(), RIK::RIK_NumVGPR, OutContext)
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)
->getVariableValue(),
STM.hasMAIInsts()
? RI.getSymbol(MF.getName(), RIK::RIK_NumAGPR, OutContext)
->getVariableValue()
: nullptr,
STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),
RIK::RIK_NumAGPR, OutContext)
->getVariableValue()
: nullptr,
RI.createTotalNumVGPRs(MF, Ctx),
RI.createTotalNumSGPRs(
MF,
MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),
Ctx),
RI.getSymbol(MF.getName(), RIK::RIK_PrivateSegSize, OutContext)
RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
OutContext)
->getVariableValue(),
getFunctionCodeSize(MF), MFI);
return false;
Expand Down Expand Up @@ -943,7 +950,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

auto GetSymRefExpr =
[&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {
MCSymbol *Sym = RI.getSymbol(MF.getName(), RIK, OutContext);
MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);
return MCSymbolRefExpr::create(Sym, Ctx);
};

Expand Down
36 changes: 25 additions & 11 deletions llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetMachine.h"

using namespace llvm;

Expand Down Expand Up @@ -95,6 +96,8 @@ void MCResourceInfo::assignResourceInfoExpr(
int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
MCContext &OutContext) {
const LLVMTargetMachine &TM = MF.getTarget();
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
const MCConstantExpr *LocalConstExpr =
MCConstantExpr::create(LocalValue, OutContext);
const MCExpr *SymVal = LocalConstExpr;
Expand All @@ -109,12 +112,14 @@ void MCResourceInfo::assignResourceInfoExpr(
for (const Function *Callee : Callees) {
if (!Seen.insert(Callee).second)
continue;
MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
MCSymbol *CalleeValSym =
getSymbol(CalleeFnSym->getName(), RIK, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
}

Expand All @@ -133,14 +138,17 @@ void MCResourceInfo::gatherResourceInfo(
addMaxSGPRCandidate(FRI.NumExplicitSGPR);
}

const LLVMTargetMachine &TM = MF.getTarget();
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());

auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs,
ResourceInfoKind RIK) {
if (!FRI.HasIndirectCall) {
assignResourceInfoExpr(numRegs, RIK, AMDGPUMCExpr::AGVK_Max, MF,
FRI.Callees, OutContext);
} else {
const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext);
MCSymbol *LocalNumSym = getSymbol(MF.getName(), RIK, OutContext);
MCSymbol *LocalNumSym = getSymbol(FnSym->getName(), RIK, OutContext);
const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
{MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext);
LocalNumSym->setVariableValue(MaxWithLocal);
Expand All @@ -165,8 +173,9 @@ void MCResourceInfo::gatherResourceInfo(
if (!Seen.insert(Callee).second)
continue;
if (!Callee->isDeclaration()) {
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
MCSymbol *calleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
}
}
Expand All @@ -178,12 +187,12 @@ void MCResourceInfo::gatherResourceInfo(
localConstExpr =
MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
}
getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext)
getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext)
->setVariableValue(localConstExpr);
}

auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
};

Expand Down Expand Up @@ -213,18 +222,23 @@ void MCResourceInfo::gatherResourceInfo(

const MCExpr *MCResourceInfo::createTotalNumVGPRs(const MachineFunction &MF,
MCContext &Ctx) {
const LLVMTargetMachine &TM = MF.getTarget();
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
return AMDGPUMCExpr::createTotalNumVGPR(
getSymRefExpr(MF.getName(), RIK_NumAGPR, Ctx),
getSymRefExpr(MF.getName(), RIK_NumVGPR, Ctx), Ctx);
getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx),
getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx), Ctx);
}

const MCExpr *MCResourceInfo::createTotalNumSGPRs(const MachineFunction &MF,
bool hasXnack,
MCContext &Ctx) {
const LLVMTargetMachine &TM = MF.getTarget();
MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
return MCBinaryExpr::createAdd(
getSymRefExpr(MF.getName(), RIK_NumSGPR, Ctx),
getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx),
AMDGPUMCExpr::createExtraSGPRs(
getSymRefExpr(MF.getName(), RIK_UsesVCC, Ctx),
getSymRefExpr(MF.getName(), RIK_UsesFlatScratch, Ctx), hasXnack, Ctx),
getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx),
getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx), hasXnack,
Ctx),
Ctx);
}
48 changes: 48 additions & 0 deletions llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck %s

; CHECK-LABEL: __unnamed_1:
; CHECK: .set __unnamed_1.num_vgpr, 0
; CHECK: .set __unnamed_1.num_agpr, 0
; CHECK: .set __unnamed_1.numbered_sgpr, 32
; CHECK: .set __unnamed_1.private_seg_size, 0
; CHECK: .set __unnamed_1.uses_vcc, 0
; CHECK: .set __unnamed_1.uses_flat_scratch, 0
; CHECK: .set __unnamed_1.has_dyn_sized_stack, 0
; CHECK: .set __unnamed_1.has_recursion, 0
; CHECK: .set __unnamed_1.has_indirect_call, 0
define void @1() {
entry:
ret void
}

; CHECK-LABEL: __unnamed_2:
; CHECK: .set __unnamed_2.num_vgpr, max(32, __unnamed_1.num_vgpr)
; CHECK: .set __unnamed_2.num_agpr, max(0, __unnamed_1.num_agpr)
; CHECK: .set __unnamed_2.numbered_sgpr, max(34, __unnamed_1.numbered_sgpr)
; CHECK: .set __unnamed_2.private_seg_size, 16+(max(__unnamed_1.private_seg_size))
; CHECK: .set __unnamed_2.uses_vcc, or(0, __unnamed_1.uses_vcc)
; CHECK: .set __unnamed_2.uses_flat_scratch, or(0, __unnamed_1.uses_flat_scratch)
; CHECK: .set __unnamed_2.has_dyn_sized_stack, or(0, __unnamed_1.has_dyn_sized_stack)
; CHECK: .set __unnamed_2.has_recursion, or(1, __unnamed_1.has_recursion)
; CHECK: .set __unnamed_2.has_indirect_call, or(0, __unnamed_1.has_indirect_call)
define void @2() {
entry:
call void @1()
ret void
}

; CHECK-LABEL: {{^}}use
; CHECK: .set use.num_vgpr, max(32, __unnamed_1.num_vgpr, __unnamed_2.num_vgpr)
; CHECK: .set use.num_agpr, max(0, __unnamed_1.num_agpr, __unnamed_2.num_agpr)
; CHECK: .set use.numbered_sgpr, max(33, __unnamed_1.numbered_sgpr, __unnamed_2.numbered_sgpr)
; CHECK: .set use.private_seg_size, 0+(max(__unnamed_1.private_seg_size, __unnamed_2.private_seg_size))
; CHECK: .set use.uses_vcc, or(0, __unnamed_1.uses_vcc, __unnamed_2.uses_vcc)
; CHECK: .set use.uses_flat_scratch, or(1, __unnamed_1.uses_flat_scratch, __unnamed_2.uses_flat_scratch)
; CHECK: .set use.has_dyn_sized_stack, or(0, __unnamed_1.has_dyn_sized_stack, __unnamed_2.has_dyn_sized_stack)
; CHECK: .set use.has_recursion, or(1, __unnamed_1.has_recursion, __unnamed_2.has_recursion)
; CHECK: .set use.has_indirect_call, or(0, __unnamed_1.has_indirect_call, __unnamed_2.has_indirect_call)
define amdgpu_kernel void @use() {
call void @1()
call void @2()
ret void
}
Loading