@@ -918,6 +918,96 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
918
918
llvm_unreachable (" AAAMDWavesPerEU is only valid for function position" );
919
919
}
920
920
921
+ static bool inlineAsmUsesAGPRs (const InlineAsm *IA) {
922
+ for (const auto &CI : IA->ParseConstraints ()) {
923
+ for (StringRef Code : CI.Codes ) {
924
+ Code.consume_front (" {" );
925
+ if (Code.starts_with (" a" ))
926
+ return true ;
927
+ }
928
+ }
929
+
930
+ return false ;
931
+ }
932
+
933
+ struct AAAMDGPUNoAGPR
934
+ : public IRAttribute<Attribute::NoUnwind,
935
+ StateWrapper<BooleanState, AbstractAttribute>,
936
+ AAAMDGPUNoAGPR> {
937
+ AAAMDGPUNoAGPR (const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
938
+
939
+ static AAAMDGPUNoAGPR &createForPosition (const IRPosition &IRP,
940
+ Attributor &A) {
941
+ if (IRP.getPositionKind () == IRPosition::IRP_FUNCTION)
942
+ return *new (A.Allocator ) AAAMDGPUNoAGPR (IRP, A);
943
+ llvm_unreachable (" AAAMDGPUNoAGPR is only valid for function position" );
944
+ }
945
+
946
+ void initialize (Attributor &A) override {
947
+ Function *F = getAssociatedFunction ();
948
+ if (F->hasFnAttribute (" amdgpu-no-agpr" ))
949
+ indicateOptimisticFixpoint ();
950
+ }
951
+
952
+ const std::string getAsStr (Attributor *A) const override {
953
+ return getAssumed () ? " amdgpu-no-agpr" : " amdgpu-maybe-agpr" ;
954
+ }
955
+
956
+ void trackStatistics () const override {}
957
+
958
+ ChangeStatus updateImpl (Attributor &A) override {
959
+ // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
960
+
961
+ auto CheckForNoAGPRs = [&](Instruction &I) {
962
+ const auto &CB = cast<CallBase>(I);
963
+ const Value *CalleeOp = CB.getCalledOperand ();
964
+ const Function *Callee = dyn_cast<Function>(CalleeOp);
965
+ if (!Callee) {
966
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
967
+ return !inlineAsmUsesAGPRs (IA);
968
+ return false ;
969
+ }
970
+
971
+ // Some intrinsics may use AGPRs, but if we have a choice, we are not
972
+ // required to use AGPRs.
973
+ if (Callee->isIntrinsic ())
974
+ return true ;
975
+
976
+ // TODO: Handle callsite attributes
977
+ const auto *CalleeInfo = A.getAAFor <AAAMDGPUNoAGPR>(
978
+ *this , IRPosition::function (*Callee), DepClassTy::REQUIRED);
979
+ return CalleeInfo && CalleeInfo->getAssumed ();
980
+ };
981
+
982
+ bool UsedAssumedInformation = false ;
983
+ if (!A.checkForAllCallLikeInstructions (CheckForNoAGPRs, *this ,
984
+ UsedAssumedInformation))
985
+ return indicatePessimisticFixpoint ();
986
+ return ChangeStatus::UNCHANGED;
987
+ }
988
+
989
+ ChangeStatus manifest (Attributor &A) override {
990
+ if (!getAssumed ())
991
+ return ChangeStatus::UNCHANGED;
992
+ LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
993
+ return A.manifestAttrs (getIRPosition (),
994
+ {Attribute::get (Ctx, " amdgpu-no-agpr" )});
995
+ }
996
+
997
+ const std::string getName () const override { return " AAAMDGPUNoAGPR" ; }
998
+ const char *getIdAddr () const override { return &ID; }
999
+
1000
+ // / This function should return true if the type of the \p AA is
1001
+ // / AAAMDGPUNoAGPRs
1002
+ static bool classof (const AbstractAttribute *AA) {
1003
+ return (AA->getIdAddr () == &ID);
1004
+ }
1005
+
1006
+ static const char ID;
1007
+ };
1008
+
1009
+ const char AAAMDGPUNoAGPR::ID = 0 ;
1010
+
921
1011
static void addPreloadKernArgHint (Function &F, TargetMachine &TM) {
922
1012
const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(F);
923
1013
for (unsigned I = 0 ;
@@ -946,8 +1036,9 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
946
1036
DenseSet<const char *> Allowed (
947
1037
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
948
1038
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
949
- &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
950
- &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
1039
+ &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
1040
+ &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1041
+ &AAUnderlyingObjects::ID});
951
1042
952
1043
AttributorConfig AC (CGUpdater);
953
1044
AC.Allowed = &Allowed;
@@ -963,6 +1054,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
963
1054
if (!F.isIntrinsic ()) {
964
1055
A.getOrCreateAAFor <AAAMDAttributes>(IRPosition::function (F));
965
1056
A.getOrCreateAAFor <AAUniformWorkGroupSize>(IRPosition::function (F));
1057
+ A.getOrCreateAAFor <AAAMDGPUNoAGPR>(IRPosition::function (F));
966
1058
CallingConv::ID CC = F.getCallingConv ();
967
1059
if (!AMDGPU::isEntryFunctionCC (CC)) {
968
1060
A.getOrCreateAAFor <AAAMDFlatWorkGroupSize>(IRPosition::function (F));
0 commit comments