Skip to content

Commit 0818ab4

Browse files
committed
[AMDGPU][NPM] Port SIInsertWaitcnts to NPM
1 parent f80cce0 commit 0818ab4

File tree

7 files changed

+76
-34
lines changed

7 files changed

+76
-34
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,13 @@ class SIMemoryLegalizerPass : public PassInfoMixin<SIMemoryLegalizerPass> {
371371
static bool isRequired() { return true; }
372372
};
373373

374+
class SIInsertWaitcntsPass : public PassInfoMixin<SIInsertWaitcntsPass> {
375+
public:
376+
PreservedAnalyses run(MachineFunction &MF,
377+
MachineFunctionAnalysisManager &MFAM);
378+
static bool isRequired() { return true; }
379+
};
380+
374381
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
375382

376383
ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -447,7 +454,7 @@ extern char &AMDGPUInsertDelayAluID;
447454
void initializeSIInsertHardClausesPass(PassRegistry &);
448455
extern char &SIInsertHardClausesID;
449456

450-
void initializeSIInsertWaitcntsPass(PassRegistry&);
457+
void initializeSIInsertWaitcntsLegacyPass(PassRegistry &);
451458
extern char &SIInsertWaitcntsID;
452459

453460
void initializeSIFormMemoryClausesLegacyPass(PassRegistry &);

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
110110
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
111111
MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
112112
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
113+
MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
113114
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
114115
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
115116
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartial
132133
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
133134

134135
DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
135-
DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
136136
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
137137
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
138138
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
536536
initializeSIAnnotateControlFlowLegacyPass(*PR);
537537
initializeAMDGPUInsertDelayAluLegacyPass(*PR);
538538
initializeSIInsertHardClausesPass(*PR);
539-
initializeSIInsertWaitcntsPass(*PR);
539+
initializeSIInsertWaitcntsLegacyPass(*PR);
540540
initializeSIModeRegisterLegacyPass(*PR);
541541
initializeSIWholeQuadModeLegacyPass(*PR);
542542
initializeSILowerControlFlowLegacyPass(*PR);
@@ -2154,7 +2154,7 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
21542154
}
21552155

21562156
addPass(SIMemoryLegalizerPass());
2157-
// TODO: addPass(SIInsertWaitcntsPass());
2157+
addPass(SIInsertWaitcntsPass());
21582158

21592159
// TODO: addPass(SIModeRegisterPass());
21602160

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "llvm/ADT/Sequence.h"
3434
#include "llvm/Analysis/AliasAnalysis.h"
3535
#include "llvm/CodeGen/MachineLoopInfo.h"
36+
#include "llvm/CodeGen/MachinePassManager.h"
3637
#include "llvm/CodeGen/MachinePostDominators.h"
3738
#include "llvm/Support/DebugCounter.h"
3839
#include "llvm/TargetParser/TargetParser.h"
@@ -585,7 +586,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
585586
AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
586587
};
587588

588-
class SIInsertWaitcnts : public MachineFunctionPass {
589+
class SIInsertWaitcnts {
589590
private:
590591
const GCNSubtarget *ST = nullptr;
591592
const SIInstrInfo *TII = nullptr;
@@ -624,9 +625,9 @@ class SIInsertWaitcnts : public MachineFunctionPass {
624625
InstCounterType MaxCounter = NUM_NORMAL_INST_CNTS;
625626

626627
public:
627-
static char ID;
628-
629-
SIInsertWaitcnts() : MachineFunctionPass(ID) {
628+
SIInsertWaitcnts(MachineLoopInfo *MLI, MachinePostDominatorTree *PDT,
629+
AliasAnalysis *AA)
630+
: MLI(MLI), PDT(PDT), AA(AA) {
630631
(void)ForceExpCounter;
631632
(void)ForceLgkmCounter;
632633
(void)ForceVMCounter;
@@ -636,20 +637,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
636637
bool isPreheaderToFlush(MachineBasicBlock &MBB,
637638
WaitcntBrackets &ScoreBrackets);
638639
bool isVMEMOrFlatVMEM(const MachineInstr &MI) const;
639-
bool runOnMachineFunction(MachineFunction &MF) override;
640-
641-
StringRef getPassName() const override {
642-
return "SI insert wait instructions";
643-
}
644-
645-
void getAnalysisUsage(AnalysisUsage &AU) const override {
646-
AU.setPreservesCFG();
647-
AU.addRequired<MachineLoopInfoWrapperPass>();
648-
AU.addRequired<MachinePostDominatorTreeWrapperPass>();
649-
AU.addUsedIfAvailable<AAResultsWrapperPass>();
650-
AU.addPreserved<AAResultsWrapperPass>();
651-
MachineFunctionPass::getAnalysisUsage(AU);
652-
}
640+
bool run(MachineFunction &MF);
653641

654642
bool isForceEmitWaitcnt() const {
655643
for (auto T : inst_counter_types())
@@ -733,6 +721,36 @@ class SIInsertWaitcnts : public MachineFunctionPass {
733721
WaitcntBrackets &ScoreBrackets);
734722
};
735723

724+
class SIInsertWaitcntsLegacy : public MachineFunctionPass {
725+
public:
726+
static char ID;
727+
SIInsertWaitcntsLegacy() : MachineFunctionPass(ID) {}
728+
729+
bool runOnMachineFunction(MachineFunction &MF) override {
730+
auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
731+
auto *PDT =
732+
&getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
733+
AliasAnalysis *AA = nullptr;
734+
if (auto *AAR = getAnalysisIfAvailable<AAResultsWrapperPass>())
735+
AA = &AAR->getAAResults();
736+
737+
return SIInsertWaitcnts(MLI, PDT, AA).run(MF);
738+
}
739+
740+
StringRef getPassName() const override {
741+
return "SI insert wait instructions";
742+
}
743+
744+
void getAnalysisUsage(AnalysisUsage &AU) const override {
745+
AU.setPreservesCFG();
746+
AU.addRequired<MachineLoopInfoWrapperPass>();
747+
AU.addRequired<MachinePostDominatorTreeWrapperPass>();
748+
AU.addUsedIfAvailable<AAResultsWrapperPass>();
749+
AU.addPreserved<AAResultsWrapperPass>();
750+
MachineFunctionPass::getAnalysisUsage(AU);
751+
}
752+
};
753+
736754
} // end anonymous namespace
737755

738756
RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
@@ -1112,19 +1130,19 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
11121130
return hasMixedPendingEvents(T);
11131131
}
11141132

1115-
INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
1116-
false)
1133+
INITIALIZE_PASS_BEGIN(SIInsertWaitcntsLegacy, DEBUG_TYPE, "SI Insert Waitcnts",
1134+
false, false)
11171135
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
11181136
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
1119-
INITIALIZE_PASS_END(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
1120-
false)
1137+
INITIALIZE_PASS_END(SIInsertWaitcntsLegacy, DEBUG_TYPE, "SI Insert Waitcnts",
1138+
false, false)
11211139

1122-
char SIInsertWaitcnts::ID = 0;
1140+
char SIInsertWaitcntsLegacy::ID = 0;
11231141

1124-
char &llvm::SIInsertWaitcntsID = SIInsertWaitcnts::ID;
1142+
char &llvm::SIInsertWaitcntsID = SIInsertWaitcntsLegacy::ID;
11251143

11261144
FunctionPass *llvm::createSIInsertWaitcntsPass() {
1127-
return new SIInsertWaitcnts();
1145+
return new SIInsertWaitcntsLegacy();
11281146
}
11291147

11301148
static bool updateOperandIfDifferent(MachineInstr &MI, AMDGPU::OpName OpName,
@@ -2393,16 +2411,29 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
23932411
return HasVMemLoad && UsesVgprLoadedOutside && ST->hasVmemWriteVgprInOrder();
23942412
}
23952413

2396-
bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
2414+
PreservedAnalyses
2415+
SIInsertWaitcntsPass::run(MachineFunction &MF,
2416+
MachineFunctionAnalysisManager &MFAM) {
2417+
auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
2418+
auto *PDT = &MFAM.getResult<MachinePostDominatorTreeAnalysis>(MF);
2419+
auto *AA = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(MF)
2420+
.getManager()
2421+
.getCachedResult<AAManager>(MF.getFunction());
2422+
2423+
if (!SIInsertWaitcnts(MLI, PDT, AA).run(MF))
2424+
return PreservedAnalyses::all();
2425+
2426+
return getMachineFunctionPassPreservedAnalyses()
2427+
.preserveSet<CFGAnalyses>()
2428+
.preserve<AAManager>();
2429+
}
2430+
2431+
bool SIInsertWaitcnts::run(MachineFunction &MF) {
23972432
ST = &MF.getSubtarget<GCNSubtarget>();
23982433
TII = ST->getInstrInfo();
23992434
TRI = &TII->getRegisterInfo();
24002435
MRI = &MF.getRegInfo();
24012436
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2402-
MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2403-
PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
2404-
if (auto *AAR = getAnalysisIfAvailable<AAResultsWrapperPass>())
2405-
AA = &AAR->getAAResults();
24062437

24072438
AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST->getCPU());
24082439

llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-insert-waitcnts %s -o - | FileCheck -check-prefix=GCN %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -passes=si-insert-waitcnts %s -o - | FileCheck -check-prefix=GCN %s
34

45
# $sgpr30_sgpr31 will hold the return address. We need a waitcnt before SI_CALL so
56
# that the return address is not clobbered in the callee by the outstanding load.

llvm/test/CodeGen/AMDGPU/insert-waitcnts-hang.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-insert-waitcnts %s -o - | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes si-insert-waitcnts %s -o - | FileCheck %s
34

45
---
56
name: test

llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# RUN: llc -run-pass=si-insert-waitcnts -mtriple=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9
33
# RUN: llc -run-pass=si-insert-waitcnts -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -o - %s | FileCheck %s
44
# RUN: llc -run-pass=si-insert-waitcnts -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -o - %s | FileCheck %s
5+
6+
# RUN: llc -passes=si-insert-waitcnts -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -o - %s | FileCheck %s
57
---
68
# CHECK-LABEL: name: vccz_corrupt_workaround
79
# CHECK: $vcc = V_CMP_EQ_F32

0 commit comments

Comments
 (0)