-
Notifications
You must be signed in to change notification settings - Fork 14.3k
NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager #102645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager #102645
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis was much more difficult than I anticipated. The pass is The NewPM path uses a ModulePass; I'm not sure if we should be Full diff: https://github.com/llvm/llvm-project/pull/102645.diff 7 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 195e2a19214e8..5b8d37a8ae794 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID;
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
extern char &AMDGPUImageIntrinsicOptimizerID;
-void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
-extern char &AMDGPUPerfHintAnalysisID;
+void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &);
+extern char &AMDGPUPerfHintAnalysisLegacyID;
void initializeGCNRegPressurePrinterPass(PassRegistry &);
extern char &GCNRegPressurePrinterID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8579774f52230..bbb4573655ab7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false,
false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
-INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
#ifdef EXPENSIVE_CHECKS
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index b6a6c33d85f83..23fb1dba7b084 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -22,6 +22,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
AMDGPULowerBufferFatPointersPass(*this))
MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
+MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass(*static_cast<const GCNTargetMachine *>(this)))
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
#undef MODULE_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 1213d5e0b41db..5797f02cb374e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -12,12 +12,15 @@
///
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -54,12 +57,6 @@ static cl::opt<unsigned>
STATISTIC(NumMemBound, "Number of functions marked as memory bound");
STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
-char llvm::AMDGPUPerfHintAnalysis::ID = 0;
-char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;
-
-INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
- "Analysis if a function is memory bound", true, true)
-
namespace {
struct AMDGPUPerfHint {
@@ -67,7 +64,7 @@ struct AMDGPUPerfHint {
public:
AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
- const TargetLowering *TLI_)
+ const SITargetLowering *TLI_)
: FIM(FIM_), TLI(TLI_) {}
bool runOnFunction(Function &F);
@@ -97,7 +94,7 @@ struct AMDGPUPerfHint {
const DataLayout *DL = nullptr;
- const TargetLowering *TLI;
+ const SITargetLowering *TLI;
AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
@@ -388,23 +385,52 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
<< Reference.print() << "Result:" << Result << '\n');
return Result;
}
+
+class AMDGPUPerfHintAnalysisLegacy : public CallGraphSCCPass {
+private:
+ // FIXME: This is relying on maintaining state between different SCCs.
+ AMDGPUPerfHintAnalysis Impl;
+
+public:
+ static char ID;
+
+ AMDGPUPerfHintAnalysisLegacy() : CallGraphSCCPass(ID) {}
+
+ bool runOnSCC(CallGraphSCC &SCC) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
+
} // namespace
-bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
+bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
+ auto FI = FIM.find(F);
+ if (FI == FIM.end())
return false;
- const TargetMachine &TM = TPC->getTM<TargetMachine>();
+ return AMDGPUPerfHint::isMemBound(FI->second);
+}
+
+bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
+ auto FI = FIM.find(F);
+ if (FI == FIM.end())
+ return false;
+
+ return AMDGPUPerfHint::needLimitWave(FI->second);
+}
+bool AMDGPUPerfHintAnalysis::runOnSCC(const GCNTargetMachine &TM,
+ CallGraphSCC &SCC) {
bool Changed = false;
for (CallGraphNode *I : SCC) {
Function *F = I->getFunction();
if (!F || F->isDeclaration())
continue;
- const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
- AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
+ AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());
if (Analyzer.runOnFunction(*F))
Changed = true;
@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}
-bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
- auto FI = FIM.find(F);
- if (FI == FIM.end())
- return false;
+bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,
+ LazyCallGraph &CG) {
- return AMDGPUPerfHint::isMemBound(FI->second);
+ SmallVector<Function *, 16> Worklist;
+ CG.buildRefSCCs();
+ for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+ for (LazyCallGraph::SCC &SCC : RC) {
+ if (SCC.size() != 1)
+ continue;
+ Function &F = SCC.begin()->getFunction();
+ if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())
+ Worklist.push_back(&F);
+ }
+ }
+
+ bool Changed = false;
+ for (auto *F : llvm::reverse(Worklist)) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
+ AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());
+
+ if (Analyzer.runOnFunction(*F))
+ Changed = true;
+ }
+
+ return Changed;
}
-bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
- auto FI = FIM.find(F);
- if (FI == FIM.end())
+char AMDGPUPerfHintAnalysisLegacy::ID = 0;
+char &llvm::AMDGPUPerfHintAnalysisLegacyID = AMDGPUPerfHintAnalysisLegacy::ID;
+
+INITIALIZE_PASS(AMDGPUPerfHintAnalysisLegacy, DEBUG_TYPE,
+ "Analysis if a function is memory bound", true, true)
+
+bool AMDGPUPerfHintAnalysisLegacy::runOnSCC(CallGraphSCC &SCC) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
return false;
- return AMDGPUPerfHint::needLimitWave(FI->second);
+ const GCNTargetMachine &TM = TPC->getTM<GCNTargetMachine>();
+ return Impl.runOnSCC(TM, SCC);
+}
+
+PreservedAnalyses AMDGPUPerfHintAnalysisPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ auto &CG = AM.getResult<LazyCallGraphAnalysis>(M);
+
+ bool Changed = Impl->run(TM, CG);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<LazyCallGraphAnalysis>();
+ return PA;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index 2db8db6957cee..cf2ab82537800 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -12,35 +12,29 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
-#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
-#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueMap.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+
namespace llvm {
-struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
- static char ID;
+class AMDGPUPerfHintAnalysis;
+class CallGraphSCC;
+class GCNTargetMachine;
+class LazyCallGraph;
+class AMDGPUPerfHintAnalysis {
public:
- AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
-
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-
- bool isMemoryBound(const Function *F) const;
-
- bool needsWaveLimiter(const Function *F) const;
-
struct FuncInfo {
unsigned MemInstCost;
unsigned InstCost;
- unsigned IAMInstCost; // Indirect access memory instruction count
- unsigned LSMInstCost; // Large stride memory instruction count
+ unsigned IAMInstCost; // Indirect access memory instruction count
+ unsigned LSMInstCost; // Large stride memory instruction count
bool HasDenseGlobalMemAcc; // Set if at least 1 basic block has relatively
// high global memory access
FuncInfo()
@@ -48,11 +42,35 @@ struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
HasDenseGlobalMemAcc(false) {}
};
- typedef ValueMap<const Function*, FuncInfo> FuncInfoMap;
+ typedef ValueMap<const Function *, FuncInfo> FuncInfoMap;
private:
-
FuncInfoMap FIM;
+
+public:
+ AMDGPUPerfHintAnalysis() {}
+
+ // OldPM
+ bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC);
+
+ // NewPM
+ bool run(const GCNTargetMachine &TM, LazyCallGraph &CG);
+
+ bool isMemoryBound(const Function *F) const;
+
+ bool needsWaveLimiter(const Function *F) const;
};
+
+struct AMDGPUPerfHintAnalysisPass
+ : public PassInfoMixin<AMDGPUPerfHintAnalysisPass> {
+ const GCNTargetMachine &TM;
+ std::unique_ptr<AMDGPUPerfHintAnalysis> Impl;
+
+ AMDGPUPerfHintAnalysisPass(const GCNTargetMachine &TM)
+ : TM(TM), Impl(std::make_unique<AMDGPUPerfHintAnalysis>()) {}
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
} // namespace llvm
-#endif // LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e80daff96c431..676b0b5046b24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -21,6 +21,7 @@
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUMacroFusion.h"
+#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPURegBankSelect.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
@@ -1230,7 +1231,7 @@ bool GCNPassConfig::addPreISel() {
addPass(createLCSSAPass());
if (TM->getOptLevel() > CodeGenOptLevel::Less)
- addPass(&AMDGPUPerfHintAnalysisID);
+ addPass(&AMDGPUPerfHintAnalysisLegacyID);
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll
index 77e0f46a3d457..f4ee4fb82e7a3 100644
--- a/llvm/test/CodeGen/AMDGPU/perfhint.ll
+++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-perf-hint < %s | FileCheck -check-prefix=CHECK %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-perf-hint < %s | FileCheck -check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}test_membound:
|
@@ -22,6 +22,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers", | |||
AMDGPULowerBufferFatPointersPass(*this)) | |||
MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) | |||
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this)) | |||
MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass(*static_cast<const GCNTargetMachine *>(this))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Exceeds 80 chars per line.
if (SCC.size() != 1) | ||
continue; | ||
Function &F = SCC.begin()->getFunction(); | ||
if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is it limited to internal linkage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Copied from FunctionAttrs, but I think this meant to really be checking for interposable linkage (and I thought I dropped this before posting?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually this broke the test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nevermind, I somehow posted the wrong version that has the reverse list which is wrong
95c809d
to
a9c57cd
Compare
353885d
to
a24983c
Compare
a24983c
to
0fa3fea
Compare
7feda95
to
13dd6d2
Compare
This was much more difficult than I anticipated. The pass is not in a good state, with poor test coverage. The legacy PM does seem to be relying on maintaining the map state between different SCCs, which seems bad. The pass is going out of its way to avoid putting the attributes it introduces onto non-callee functions. If it just added them, we could use them directly instead of relying on the map, I would think. The NewPM path uses a ModulePass; I'm not sure if we should be using CGSCC here but there seems to be some missing infrastructure to support backend defined ones.
0fa3fea
to
25f1d7e
Compare
This was much more difficult than I anticipated. The pass is
not in a good state, with poor test coverage. The legacy PM
does seem to be relying on maintaining the map state between
different SCCs, which seems bad. The pass is going out of its
way to avoid putting the attributes it introduces onto non-callee
functions. If it just added them, we could use them directly
instead of relying on the map, I would think.
The NewPM path uses a ModulePass; I'm not sure if we should be
using CGSCC here but there seems to be some missing infrastructure
to support backend defined ones.