Skip to content

Commit 0fa3fea

Browse files
committed
NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager
This was much more difficult than I anticipated. The pass is not in a good state, with poor test coverage. The legacy PM does seem to be relying on maintaining the map state between different SCCs, which seems bad. The pass is going out of its way to avoid putting the attributes it introduces onto non-callee functions. If it just added them, we could use them directly instead of relying on the map, I would think. The NewPM path uses a ModulePass; I'm not sure if we should be using CGSCC here but there seems to be some missing infrastructure to support backend defined ones.
1 parent a9c57cd commit 0fa3fea

File tree

7 files changed

+136
-50
lines changed

7 files changed

+136
-50
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID;
209209
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
210210
extern char &AMDGPUImageIntrinsicOptimizerID;
211211

212-
void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
213-
extern char &AMDGPUPerfHintAnalysisID;
212+
void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &);
213+
extern char &AMDGPUPerfHintAnalysisLegacyID;
214214

215215
void initializeGCNRegPressurePrinterPass(PassRegistry &);
216216
extern char &GCNRegPressurePrinterID;

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
102102
"AMDGPU DAG->DAG Pattern Instruction Selection", false,
103103
false)
104104
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
105-
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
105+
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
106106
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
107107
#ifdef EXPENSIVE_CHECKS
108108
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
2222
AMDGPULowerBufferFatPointersPass(*this))
2323
MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
2424
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
25+
MODULE_PASS("amdgpu-perf-hint",
26+
AMDGPUPerfHintAnalysisPass(
27+
*static_cast<const GCNTargetMachine *>(this)))
2528
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
2629
MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
2730
#undef MODULE_PASS

llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp

Lines changed: 87 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@
1212
///
1313
//===----------------------------------------------------------------------===//
1414

15-
#include "AMDGPU.h"
1615
#include "AMDGPUPerfHintAnalysis.h"
16+
#include "AMDGPU.h"
17+
#include "AMDGPUTargetMachine.h"
1718
#include "Utils/AMDGPUBaseInfo.h"
1819
#include "llvm/ADT/SmallSet.h"
1920
#include "llvm/ADT/Statistic.h"
2021
#include "llvm/Analysis/CallGraph.h"
22+
#include "llvm/Analysis/CallGraphSCCPass.h"
23+
#include "llvm/Analysis/LazyCallGraph.h"
2124
#include "llvm/Analysis/ValueTracking.h"
2225
#include "llvm/CodeGen/TargetLowering.h"
2326
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -54,20 +57,14 @@ static cl::opt<unsigned>
5457
STATISTIC(NumMemBound, "Number of functions marked as memory bound");
5558
STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
5659

57-
char llvm::AMDGPUPerfHintAnalysis::ID = 0;
58-
char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;
59-
60-
INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
61-
"Analysis if a function is memory bound", true, true)
62-
6360
namespace {
6461

6562
struct AMDGPUPerfHint {
6663
friend AMDGPUPerfHintAnalysis;
6764

6865
public:
6966
AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
70-
const TargetLowering *TLI_)
67+
const SITargetLowering *TLI_)
7168
: FIM(FIM_), TLI(TLI_) {}
7269

7370
bool runOnFunction(Function &F);
@@ -97,7 +94,7 @@ struct AMDGPUPerfHint {
9794

9895
const DataLayout *DL = nullptr;
9996

100-
const TargetLowering *TLI;
97+
const SITargetLowering *TLI;
10198

10299
AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
103100
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
@@ -388,23 +385,52 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
388385
<< Reference.print() << "Result:" << Result << '\n');
389386
return Result;
390387
}
388+
389+
class AMDGPUPerfHintAnalysisLegacy : public CallGraphSCCPass {
390+
private:
391+
// FIXME: This is relying on maintaining state between different SCCs.
392+
AMDGPUPerfHintAnalysis Impl;
393+
394+
public:
395+
static char ID;
396+
397+
AMDGPUPerfHintAnalysisLegacy() : CallGraphSCCPass(ID) {}
398+
399+
bool runOnSCC(CallGraphSCC &SCC) override;
400+
401+
void getAnalysisUsage(AnalysisUsage &AU) const override {
402+
AU.setPreservesAll();
403+
}
404+
};
405+
391406
} // namespace
392407

393-
bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
394-
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
395-
if (!TPC)
408+
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
409+
auto FI = FIM.find(F);
410+
if (FI == FIM.end())
396411
return false;
397412

398-
const TargetMachine &TM = TPC->getTM<TargetMachine>();
413+
return AMDGPUPerfHint::isMemBound(FI->second);
414+
}
415+
416+
bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
417+
auto FI = FIM.find(F);
418+
if (FI == FIM.end())
419+
return false;
399420

421+
return AMDGPUPerfHint::needLimitWave(FI->second);
422+
}
423+
424+
bool AMDGPUPerfHintAnalysis::runOnSCC(const GCNTargetMachine &TM,
425+
CallGraphSCC &SCC) {
400426
bool Changed = false;
401427
for (CallGraphNode *I : SCC) {
402428
Function *F = I->getFunction();
403429
if (!F || F->isDeclaration())
404430
continue;
405431

406-
const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
407-
AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
432+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
433+
AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());
408434

409435
if (Analyzer.runOnFunction(*F))
410436
Changed = true;
@@ -413,18 +439,55 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
413439
return Changed;
414440
}
415441

416-
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
417-
auto FI = FIM.find(F);
418-
if (FI == FIM.end())
419-
return false;
442+
bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,
443+
LazyCallGraph &CG) {
444+
bool Changed = false;
420445

421-
return AMDGPUPerfHint::isMemBound(FI->second);
446+
CG.buildRefSCCs();
447+
448+
for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
449+
for (LazyCallGraph::SCC &SCC : RC) {
450+
if (SCC.size() != 1)
451+
continue;
452+
Function &F = SCC.begin()->getFunction();
453+
// TODO: Skip without norecurse, or interposable?
454+
if (F.isDeclaration())
455+
continue;
456+
457+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
458+
AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());
459+
if (Analyzer.runOnFunction(F))
460+
Changed = true;
461+
}
462+
}
463+
464+
return Changed;
422465
}
423466

424-
bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
425-
auto FI = FIM.find(F);
426-
if (FI == FIM.end())
467+
char AMDGPUPerfHintAnalysisLegacy::ID = 0;
468+
char &llvm::AMDGPUPerfHintAnalysisLegacyID = AMDGPUPerfHintAnalysisLegacy::ID;
469+
470+
INITIALIZE_PASS(AMDGPUPerfHintAnalysisLegacy, DEBUG_TYPE,
471+
"Analysis if a function is memory bound", true, true)
472+
473+
bool AMDGPUPerfHintAnalysisLegacy::runOnSCC(CallGraphSCC &SCC) {
474+
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
475+
if (!TPC)
427476
return false;
428477

429-
return AMDGPUPerfHint::needLimitWave(FI->second);
478+
const GCNTargetMachine &TM = TPC->getTM<GCNTargetMachine>();
479+
return Impl.runOnSCC(TM, SCC);
480+
}
481+
482+
PreservedAnalyses AMDGPUPerfHintAnalysisPass::run(Module &M,
483+
ModuleAnalysisManager &AM) {
484+
auto &CG = AM.getResult<LazyCallGraphAnalysis>(M);
485+
486+
bool Changed = Impl->run(TM, CG);
487+
if (!Changed)
488+
return PreservedAnalyses::all();
489+
490+
PreservedAnalyses PA;
491+
PA.preserve<LazyCallGraphAnalysis>();
492+
return PA;
430493
}

llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,47 +12,65 @@
1212
///
1313
//===----------------------------------------------------------------------===//
1414

15-
#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
16-
#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
15+
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
16+
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
1717

18-
#include "llvm/Analysis/CallGraphSCCPass.h"
18+
#include "llvm/IR/PassManager.h"
1919
#include "llvm/IR/ValueMap.h"
2020

21+
#include "llvm/Analysis/CGSCCPassManager.h"
22+
#include "llvm/Analysis/LazyCallGraph.h"
23+
2124
namespace llvm {
2225

23-
struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
24-
static char ID;
26+
class AMDGPUPerfHintAnalysis;
27+
class CallGraphSCC;
28+
class GCNTargetMachine;
29+
class LazyCallGraph;
2530

31+
class AMDGPUPerfHintAnalysis {
2632
public:
27-
AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
28-
29-
bool runOnSCC(CallGraphSCC &SCC) override;
30-
31-
void getAnalysisUsage(AnalysisUsage &AU) const override {
32-
AU.setPreservesAll();
33-
}
34-
35-
bool isMemoryBound(const Function *F) const;
36-
37-
bool needsWaveLimiter(const Function *F) const;
38-
3933
struct FuncInfo {
4034
unsigned MemInstCost;
4135
unsigned InstCost;
42-
unsigned IAMInstCost; // Indirect access memory instruction count
43-
unsigned LSMInstCost; // Large stride memory instruction count
36+
unsigned IAMInstCost; // Indirect access memory instruction count
37+
unsigned LSMInstCost; // Large stride memory instruction count
4438
bool HasDenseGlobalMemAcc; // Set if at least 1 basic block has relatively
4539
// high global memory access
4640
FuncInfo()
4741
: MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0),
4842
HasDenseGlobalMemAcc(false) {}
4943
};
5044

51-
typedef ValueMap<const Function*, FuncInfo> FuncInfoMap;
45+
typedef ValueMap<const Function *, FuncInfo> FuncInfoMap;
5246

5347
private:
54-
5548
FuncInfoMap FIM;
49+
50+
public:
51+
AMDGPUPerfHintAnalysis() {}
52+
53+
// OldPM
54+
bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC);
55+
56+
// NewPM
57+
bool run(const GCNTargetMachine &TM, LazyCallGraph &CG);
58+
59+
bool isMemoryBound(const Function *F) const;
60+
61+
bool needsWaveLimiter(const Function *F) const;
5662
};
63+
64+
struct AMDGPUPerfHintAnalysisPass
65+
: public PassInfoMixin<AMDGPUPerfHintAnalysisPass> {
66+
const GCNTargetMachine &TM;
67+
std::unique_ptr<AMDGPUPerfHintAnalysis> Impl;
68+
69+
AMDGPUPerfHintAnalysisPass(const GCNTargetMachine &TM)
70+
: TM(TM), Impl(std::make_unique<AMDGPUPerfHintAnalysis>()) {}
71+
72+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
73+
};
74+
5775
} // namespace llvm
58-
#endif // LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
76+
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "AMDGPUIGroupLP.h"
2222
#include "AMDGPUISelDAGToDAG.h"
2323
#include "AMDGPUMacroFusion.h"
24+
#include "AMDGPUPerfHintAnalysis.h"
2425
#include "AMDGPURegBankSelect.h"
2526
#include "AMDGPUSplitModule.h"
2627
#include "AMDGPUTargetObjectFile.h"
@@ -1229,7 +1230,7 @@ bool GCNPassConfig::addPreISel() {
12291230
addPass(createLCSSAPass());
12301231

12311232
if (TM->getOptLevel() > CodeGenOptLevel::Less)
1232-
addPass(&AMDGPUPerfHintAnalysisID);
1233+
addPass(&AMDGPUPerfHintAnalysisLegacyID);
12331234

12341235
return false;
12351236
}

llvm/test/CodeGen/AMDGPU/perfhint.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
22
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-perf-hint < %s | FileCheck -check-prefix=CHECK %s
3+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-perf-hint < %s | FileCheck -check-prefix=CHECK %s
34
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s
45

56
; GCN-LABEL: {{^}}test_membound:

0 commit comments

Comments
 (0)