Skip to content

AMDGPU: Infer no-agpr usage in AMDGPUAttributor #85948

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,11 @@ The AMDGPU backend supports the following LLVM IR attributes.
CLANG attribute [CLANG-ATTR]_. Clang only emits this attribute when all
the three numbers are >= 1.

"amdgpu-no-agpr" Indicates the function will not require allocating AGPRs. This is only
relevant on subtargets with AGPRs. The behavior is undefined if a
function which requires AGPRs is reached through any function marked
with this attribute.

======================================= ==========================================================

Calling Conventions
Expand Down
96 changes: 94 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,96 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
}

static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
for (const auto &CI : IA->ParseConstraints()) {
for (StringRef Code : CI.Codes) {
Code.consume_front("{");
if (Code.starts_with("a"))
return true;
}
}

return false;
}

struct AAAMDGPUNoAGPR
: public IRAttribute<Attribute::NoUnwind,
StateWrapper<BooleanState, AbstractAttribute>,
AAAMDGPUNoAGPR> {
AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
}

void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
if (F->hasFnAttribute("amdgpu-no-agpr"))
indicateOptimisticFixpoint();
}

const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
}

void trackStatistics() const override {}

ChangeStatus updateImpl(Attributor &A) override {
// TODO: Use AACallEdges, but then we need a way to inspect asm edges.

auto CheckForNoAGPRs = [&](Instruction &I) {
const auto &CB = cast<CallBase>(I);
const Value *CalleeOp = CB.getCalledOperand();
const Function *Callee = dyn_cast<Function>(CalleeOp);
if (!Callee) {
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
return !inlineAsmUsesAGPRs(IA);
return false;
}

// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
if (Callee->isIntrinsic())
return true;

// TODO: Handle callsite attributes
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
return CalleeInfo && CalleeInfo->getAssumed();
};

bool UsedAssumedInformation = false;
if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}

ChangeStatus manifest(Attributor &A) override {
if (!getAssumed())
return ChangeStatus::UNCHANGED;
LLVMContext &Ctx = getAssociatedFunction()->getContext();
return A.manifestAttrs(getIRPosition(),
{Attribute::get(Ctx, "amdgpu-no-agpr")});
}

const std::string getName() const override { return "AAAMDGPUNoAGPR"; }
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAAMDGPUNoAGPRs
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}

static const char ID;
};

const char AAAMDGPUNoAGPR::ID = 0;

static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
for (unsigned I = 0;
Expand Down Expand Up @@ -946,8 +1036,9 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
&AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID});

AttributorConfig AC(CGUpdater);
AC.Allowed = &Allowed;
Expand All @@ -963,6 +1054,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
if (!F.isIntrinsic()) {
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));
CallingConv::ID CC = F.getCallingConv();
if (!AMDGPU::isEntryFunctionCC(CC)) {
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
Expand Down
30 changes: 1 addition & 29 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -748,35 +748,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
}

bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
for (const BasicBlock &BB : F) {
for (const Instruction &I : BB) {
const auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
continue;

if (CB->isInlineAsm()) {
const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
for (const auto &CI : IA->ParseConstraints()) {
for (StringRef Code : CI.Codes) {
Code.consume_front("{");
if (Code.starts_with("a"))
return true;
}
}
continue;
}

const Function *Callee =
dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
if (!Callee)
return true;

if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
return true;
}
}

return false;
return !F.hasFnAttribute("amdgpu-no-agpr");
}

bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,9 @@ attributes #1 = { nounwind }
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
;.
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
Expand Down
Loading