Skip to content

Commit e962082

Browse files
committed
[WIP][AMDGPU][Attributor] Infer inreg attribute in AMDGPUAttributor
1 parent ede40da commit e962082

File tree

5 files changed

+377
-4
lines changed

5 files changed

+377
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
1212

1313
#include "llvm/CodeGen/MachinePassManager.h"
14+
#include "llvm/IR/CallingConv.h"
1415
#include "llvm/IR/PassManager.h"
1516
#include "llvm/Pass.h"
1617
#include "llvm/Support/AMDGPUAddrSpace.h"

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "GCNSubtarget.h"
1515
#include "Utils/AMDGPUBaseInfo.h"
1616
#include "llvm/Analysis/CycleAnalysis.h"
17+
#include "llvm/Analysis/UniformityAnalysis.h"
1718
#include "llvm/CodeGen/TargetPassConfig.h"
1819
#include "llvm/IR/IntrinsicsAMDGPU.h"
1920
#include "llvm/IR/IntrinsicsR600.h"
@@ -1014,6 +1015,104 @@ struct AAAMDGPUNoAGPR
10141015

10151016
const char AAAMDGPUNoAGPR::ID = 0;
10161017

1018+
struct AAAMDGPUUniform
1019+
: public IRAttribute<Attribute::InReg,
1020+
StateWrapper<BooleanState, AbstractAttribute>,
1021+
AAAMDGPUUniform> {
1022+
AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
1023+
1024+
/// Create an abstract attribute view for the position \p IRP.
1025+
static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
1026+
Attributor &A);
1027+
1028+
/// See AbstractAttribute::getName()
1029+
const std::string getName() const override { return "AAAMDGPUUniform"; }
1030+
1031+
const std::string getAsStr(Attributor *A) const override {
1032+
return getAssumed() ? "inreg" : "non-inreg";
1033+
}
1034+
1035+
void trackStatistics() const override {}
1036+
1037+
/// See AbstractAttribute::getIdAddr()
1038+
const char *getIdAddr() const override { return &ID; }
1039+
1040+
/// This function should return true if the type of the \p AA is
1041+
/// AAAMDGPUUniform
1042+
static bool classof(const AbstractAttribute *AA) {
1043+
return (AA->getIdAddr() == &ID);
1044+
}
1045+
1046+
/// Unique ID (due to the unique address)
1047+
static const char ID;
1048+
};
1049+
1050+
const char AAAMDGPUUniform::ID = 0;
1051+
1052+
namespace {
1053+
1054+
struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
1055+
AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
1056+
: AAAMDGPUUniform(IRP, A) {}
1057+
1058+
void initialize(Attributor &A) override {
1059+
assert(
1060+
!AMDGPU::isEntryFunctionCC(getAssociatedFunction()->getCallingConv()));
1061+
if (getAssociatedArgument()->hasAttribute(Attribute::InReg))
1062+
indicateOptimisticFixpoint();
1063+
}
1064+
1065+
ChangeStatus updateImpl(Attributor &A) override {
1066+
unsigned ArgNo = getAssociatedArgument()->getArgNo();
1067+
1068+
auto isUniform = [&](AbstractCallSite ACS) -> bool {
1069+
CallBase *CB = ACS.getInstruction();
1070+
Value *V = CB->getArgOperandUse(ArgNo);
1071+
if (isa<Constant>(V))
1072+
return true;
1073+
if (auto *I = dyn_cast<Instruction>(V)) {
1074+
auto *AU = A.getInfoCache()
1075+
.getAnalysisResultForFunction<UniformityInfoAnalysis>(
1076+
*I->getFunction());
1077+
return AU && AU->isUniform(I);
1078+
}
1079+
if (auto *Arg = dyn_cast<Argument>(V)) {
1080+
// We assume all entry point arguments are uniform.
1081+
if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
1082+
return true;
1083+
auto *AA =
1084+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(*Arg));
1085+
return AA && AA->isValidState();
1086+
}
1087+
return false;
1088+
};
1089+
1090+
bool UsedAssumedInformation = true;
1091+
if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
1092+
UsedAssumedInformation))
1093+
return indicatePessimisticFixpoint();
1094+
1095+
if (!UsedAssumedInformation)
1096+
return indicateOptimisticFixpoint();
1097+
1098+
return ChangeStatus::UNCHANGED;
1099+
}
1100+
};
1101+
1102+
} // namespace
1103+
1104+
AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP,
1105+
Attributor &A) {
1106+
switch (IRP.getPositionKind()) {
1107+
case IRPosition::IRP_ARGUMENT:
1108+
return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A);
1109+
// TODO: Since inreg is also allowed for return value, maybe we need to add
1110+
// AAAMDGPUUniformCallSiteReturned?
1111+
default:
1112+
llvm_unreachable("not a valid position for AAAMDGPUUniform");
1113+
}
1114+
}
1115+
10171116
static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
10181117
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
10191118
for (unsigned I = 0;
@@ -1046,7 +1145,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10461145
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
10471146
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
10481147
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1049-
&AAInstanceInfo::ID});
1148+
&AAInstanceInfo::ID, &AAAMDGPUUniform::ID});
10501149

10511150
AttributorConfig AC(CGUpdater);
10521151
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1090,6 +1189,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
10901189
IRPosition::value(*SI->getPointerOperand()));
10911190
}
10921191
}
1192+
1193+
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
1194+
for (auto &Arg : F.args())
1195+
A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg));
1196+
}
10931197
}
10941198

10951199
ChangeStatus Change = A.run();
@@ -1118,6 +1222,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
11181222

11191223
void getAnalysisUsage(AnalysisUsage &AU) const override {
11201224
AU.addRequired<CycleInfoWrapperPass>();
1225+
AU.addRequired<UniformityInfoWrapperPass>();
11211226
}
11221227

11231228
StringRef getPassName() const override { return "AMDGPU Attributor"; }

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
define internal fastcc void @foo(ptr %kg) {
1010
; CHECK-LABEL: define internal fastcc void @foo(
11-
; CHECK-SAME: ptr [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-SAME: ptr inreg [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
1212
; CHECK-NEXT: [[ENTRY:.*:]]
1313
; CHECK-NEXT: [[CLOSURE_I25_I:%.*]] = getelementptr i8, ptr [[KG]], i64 336
1414
; CHECK-NEXT: [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276

0 commit comments

Comments
 (0)