Skip to content

Commit b6b703b

Browse files
authored
AMDGPU: Infer no-agpr usage in AMDGPUAttributor (#85948)
SIMachineFunctionInfo has a scan of the function body for inline asm which may use AGPRs, or callees in SIMachineFunctionInfo. Move this into the attributor, so it actually works interprocedurally. Could probably avoid most of the test churn if this bothered to avoid adding this on subtargets without AGPRs. We should also probably try to delete the MIR scan in usesAGPRs but it seems to be trickier to eliminate.
1 parent 1404640 commit b6b703b

26 files changed

+485
-159
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,11 @@ The AMDGPU backend supports the following LLVM IR attributes.
14541454
CLANG attribute [CLANG-ATTR]_. Clang only emits this attribute when all
14551455
the three numbers are >= 1.
14561456

1457+
"amdgpu-no-agpr" Indicates the function will not require allocating AGPRs. This is only
1458+
relevant on subtargets with AGPRs. The behavior is undefined if a
1459+
function which requires AGPRs is reached through any function marked
1460+
with this attribute.
1461+
14571462
======================================= ==========================================================
14581463

14591464
Calling Conventions

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,96 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
918918
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
919919
}
920920

921+
static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
922+
for (const auto &CI : IA->ParseConstraints()) {
923+
for (StringRef Code : CI.Codes) {
924+
Code.consume_front("{");
925+
if (Code.starts_with("a"))
926+
return true;
927+
}
928+
}
929+
930+
return false;
931+
}
932+
933+
struct AAAMDGPUNoAGPR
934+
: public IRAttribute<Attribute::NoUnwind,
935+
StateWrapper<BooleanState, AbstractAttribute>,
936+
AAAMDGPUNoAGPR> {
937+
AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
938+
939+
static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
940+
Attributor &A) {
941+
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
942+
return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
943+
llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
944+
}
945+
946+
void initialize(Attributor &A) override {
947+
Function *F = getAssociatedFunction();
948+
if (F->hasFnAttribute("amdgpu-no-agpr"))
949+
indicateOptimisticFixpoint();
950+
}
951+
952+
const std::string getAsStr(Attributor *A) const override {
953+
return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
954+
}
955+
956+
void trackStatistics() const override {}
957+
958+
ChangeStatus updateImpl(Attributor &A) override {
959+
// TODO: Use AACallEdges, but then we need a way to inspect asm edges.
960+
961+
auto CheckForNoAGPRs = [&](Instruction &I) {
962+
const auto &CB = cast<CallBase>(I);
963+
const Value *CalleeOp = CB.getCalledOperand();
964+
const Function *Callee = dyn_cast<Function>(CalleeOp);
965+
if (!Callee) {
966+
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
967+
return !inlineAsmUsesAGPRs(IA);
968+
return false;
969+
}
970+
971+
// Some intrinsics may use AGPRs, but if we have a choice, we are not
972+
// required to use AGPRs.
973+
if (Callee->isIntrinsic())
974+
return true;
975+
976+
// TODO: Handle callsite attributes
977+
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
978+
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
979+
return CalleeInfo && CalleeInfo->getAssumed();
980+
};
981+
982+
bool UsedAssumedInformation = false;
983+
if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
984+
UsedAssumedInformation))
985+
return indicatePessimisticFixpoint();
986+
return ChangeStatus::UNCHANGED;
987+
}
988+
989+
ChangeStatus manifest(Attributor &A) override {
990+
if (!getAssumed())
991+
return ChangeStatus::UNCHANGED;
992+
LLVMContext &Ctx = getAssociatedFunction()->getContext();
993+
return A.manifestAttrs(getIRPosition(),
994+
{Attribute::get(Ctx, "amdgpu-no-agpr")});
995+
}
996+
997+
const std::string getName() const override { return "AAAMDGPUNoAGPR"; }
998+
const char *getIdAddr() const override { return &ID; }
999+
1000+
/// This function should return true if the type of the \p AA is
1001+
/// AAAMDGPUNoAGPRs
1002+
static bool classof(const AbstractAttribute *AA) {
1003+
return (AA->getIdAddr() == &ID);
1004+
}
1005+
1006+
static const char ID;
1007+
};
1008+
1009+
const char AAAMDGPUNoAGPR::ID = 0;
1010+
9211011
static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
9221012
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
9231013
for (unsigned I = 0;
@@ -946,8 +1036,9 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
9461036
DenseSet<const char *> Allowed(
9471037
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
9481038
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
949-
&AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
950-
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
1039+
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
1040+
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1041+
&AAUnderlyingObjects::ID});
9511042

9521043
AttributorConfig AC(CGUpdater);
9531044
AC.Allowed = &Allowed;
@@ -963,6 +1054,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
9631054
if (!F.isIntrinsic()) {
9641055
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
9651056
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
1057+
A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));
9661058
CallingConv::ID CC = F.getCallingConv();
9671059
if (!AMDGPU::isEntryFunctionCC(CC)) {
9681060
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -748,35 +748,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
748748
}
749749

750750
bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
751-
for (const BasicBlock &BB : F) {
752-
for (const Instruction &I : BB) {
753-
const auto *CB = dyn_cast<CallBase>(&I);
754-
if (!CB)
755-
continue;
756-
757-
if (CB->isInlineAsm()) {
758-
const InlineAsm *IA = dyn_cast<InlineAsm>(CB->getCalledOperand());
759-
for (const auto &CI : IA->ParseConstraints()) {
760-
for (StringRef Code : CI.Codes) {
761-
Code.consume_front("{");
762-
if (Code.starts_with("a"))
763-
return true;
764-
}
765-
}
766-
continue;
767-
}
768-
769-
const Function *Callee =
770-
dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
771-
if (!Callee)
772-
return true;
773-
774-
if (Callee->getIntrinsicID() == Intrinsic::not_intrinsic)
775-
return true;
776-
}
777-
}
778-
779-
return false;
751+
return !F.hasFnAttribute("amdgpu-no-agpr");
780752
}
781753

782754
bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,9 @@ attributes #1 = { nounwind }
233233
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
234234
;.
235235
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
236-
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237-
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238-
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
236+
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237+
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238+
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
239239
;.
240240
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
241241
;.

0 commit comments

Comments
 (0)