Skip to content

NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager #102645

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ extern char &SIPreAllocateWWMRegsID;
void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
extern char &AMDGPUImageIntrinsicOptimizerID;

void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisID;
void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &);
extern char &AMDGPUPerfHintAnalysisLegacyID;

void initializeGCNRegPressurePrinterPass(PassRegistry &);
extern char &GCNRegPressurePrinterID;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false,
false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
#ifdef EXPENSIVE_CHECKS
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
AMDGPULowerBufferFatPointersPass(*this))
MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
MODULE_PASS("amdgpu-perf-hint",
AMDGPUPerfHintAnalysisPass(
*static_cast<const GCNTargetMachine *>(this)))
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
#undef MODULE_PASS
Expand Down
111 changes: 87 additions & 24 deletions llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
///
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
Expand Down Expand Up @@ -54,20 +57,14 @@ static cl::opt<unsigned>
STATISTIC(NumMemBound, "Number of functions marked as memory bound");
STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");

char llvm::AMDGPUPerfHintAnalysis::ID = 0;
char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;

INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
"Analysis if a function is memory bound", true, true)

namespace {

struct AMDGPUPerfHint {
friend AMDGPUPerfHintAnalysis;

public:
AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
const TargetLowering *TLI_)
const SITargetLowering *TLI_)
: FIM(FIM_), TLI(TLI_) {}

bool runOnFunction(Function &F);
Expand Down Expand Up @@ -97,7 +94,7 @@ struct AMDGPUPerfHint {

const DataLayout *DL = nullptr;

const TargetLowering *TLI;
const SITargetLowering *TLI;

AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
Expand Down Expand Up @@ -388,23 +385,52 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
<< Reference.print() << "Result:" << Result << '\n');
return Result;
}

class AMDGPUPerfHintAnalysisLegacy : public CallGraphSCCPass {
private:
// FIXME: This is relying on maintaining state between different SCCs.
AMDGPUPerfHintAnalysis Impl;

public:
static char ID;

AMDGPUPerfHintAnalysisLegacy() : CallGraphSCCPass(ID) {}

bool runOnSCC(CallGraphSCC &SCC) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
};

} // namespace

bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
auto FI = FIM.find(F);
if (FI == FIM.end())
return false;

const TargetMachine &TM = TPC->getTM<TargetMachine>();
return AMDGPUPerfHint::isMemBound(FI->second);
}

bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
auto FI = FIM.find(F);
if (FI == FIM.end())
return false;

return AMDGPUPerfHint::needLimitWave(FI->second);
}

bool AMDGPUPerfHintAnalysis::runOnSCC(const GCNTargetMachine &TM,
CallGraphSCC &SCC) {
bool Changed = false;
for (CallGraphNode *I : SCC) {
Function *F = I->getFunction();
if (!F || F->isDeclaration())
continue;

const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());

if (Analyzer.runOnFunction(*F))
Changed = true;
Expand All @@ -413,18 +439,55 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}

bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
auto FI = FIM.find(F);
if (FI == FIM.end())
return false;
bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,
LazyCallGraph &CG) {
bool Changed = false;

return AMDGPUPerfHint::isMemBound(FI->second);
CG.buildRefSCCs();

for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
for (LazyCallGraph::SCC &SCC : RC) {
if (SCC.size() != 1)
continue;
Function &F = SCC.begin()->getFunction();
// TODO: Skip without norecurse, or interposable?
if (F.isDeclaration())
continue;

const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
AMDGPUPerfHint Analyzer(FIM, ST.getTargetLowering());
if (Analyzer.runOnFunction(F))
Changed = true;
}
}

return Changed;
}

bool AMDGPUPerfHintAnalysis::needsWaveLimiter(const Function *F) const {
auto FI = FIM.find(F);
if (FI == FIM.end())
char AMDGPUPerfHintAnalysisLegacy::ID = 0;
char &llvm::AMDGPUPerfHintAnalysisLegacyID = AMDGPUPerfHintAnalysisLegacy::ID;

INITIALIZE_PASS(AMDGPUPerfHintAnalysisLegacy, DEBUG_TYPE,
"Analysis if a function is memory bound", true, true)

bool AMDGPUPerfHintAnalysisLegacy::runOnSCC(CallGraphSCC &SCC) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;

return AMDGPUPerfHint::needLimitWave(FI->second);
const GCNTargetMachine &TM = TPC->getTM<GCNTargetMachine>();
return Impl.runOnSCC(TM, SCC);
}

PreservedAnalyses AMDGPUPerfHintAnalysisPass::run(Module &M,
ModuleAnalysisManager &AM) {
auto &CG = AM.getResult<LazyCallGraphAnalysis>(M);

bool Changed = Impl->run(TM, CG);
if (!Changed)
return PreservedAnalyses::all();

PreservedAnalyses PA;
PA.preserve<LazyCallGraphAnalysis>();
return PA;
}
62 changes: 40 additions & 22 deletions llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,47 +12,65 @@
///
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H

#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueMap.h"

#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"

namespace llvm {

struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
static char ID;
class AMDGPUPerfHintAnalysis;
class CallGraphSCC;
class GCNTargetMachine;
class LazyCallGraph;

class AMDGPUPerfHintAnalysis {
public:
AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}

bool runOnSCC(CallGraphSCC &SCC) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}

bool isMemoryBound(const Function *F) const;

bool needsWaveLimiter(const Function *F) const;

struct FuncInfo {
unsigned MemInstCost;
unsigned InstCost;
unsigned IAMInstCost; // Indirect access memory instruction count
unsigned LSMInstCost; // Large stride memory instruction count
unsigned IAMInstCost; // Indirect access memory instruction count
unsigned LSMInstCost; // Large stride memory instruction count
bool HasDenseGlobalMemAcc; // Set if at least 1 basic block has relatively
// high global memory access
FuncInfo()
: MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0),
HasDenseGlobalMemAcc(false) {}
};

typedef ValueMap<const Function*, FuncInfo> FuncInfoMap;
typedef ValueMap<const Function *, FuncInfo> FuncInfoMap;

private:

FuncInfoMap FIM;

public:
AMDGPUPerfHintAnalysis() {}

// OldPM
bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC);

// NewPM
bool run(const GCNTargetMachine &TM, LazyCallGraph &CG);

bool isMemoryBound(const Function *F) const;

bool needsWaveLimiter(const Function *F) const;
};

struct AMDGPUPerfHintAnalysisPass
: public PassInfoMixin<AMDGPUPerfHintAnalysisPass> {
const GCNTargetMachine &TM;
std::unique_ptr<AMDGPUPerfHintAnalysis> Impl;

AMDGPUPerfHintAnalysisPass(const GCNTargetMachine &TM)
: TM(TM), Impl(std::make_unique<AMDGPUPerfHintAnalysis>()) {}

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};

} // namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPERFHINTANALYSIS_H
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPURegBankSelect.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
Expand Down Expand Up @@ -1249,7 +1250,7 @@ bool GCNPassConfig::addPreISel() {
addPass(createLCSSAPass());

if (TM->getOptLevel() > CodeGenOptLevel::Less)
addPass(&AMDGPUPerfHintAnalysisID);
addPass(&AMDGPUPerfHintAnalysisLegacyID);

return false;
}
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/perfhint.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-perf-hint < %s | FileCheck -check-prefix=CHECK %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-perf-hint < %s | FileCheck -check-prefix=CHECK %s
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s

; GCN-LABEL: {{^}}test_membound:
Expand Down
Loading