-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][NPM] Port AMDGPUMarkLastScratchLoad to NPM #131738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][NPM] Port AMDGPUMarkLastScratchLoad to NPM #131738
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) ChangesThis finishes all passes for the optimized regalloc path. Full diff: https://github.com/llvm/llvm-project/pull/131738.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index f5c2b09c84806..eebc33aea7a86 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -195,7 +195,7 @@ extern char &AMDGPURegBankSelectID;
void initializeAMDGPURegBankLegalizePass(PassRegistry &);
extern char &AMDGPURegBankLegalizeID;
-void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
+void initializeAMDGPUMarkLastScratchLoadLegacyPass(PassRegistry &);
extern char &AMDGPUMarkLastScratchLoadID;
void initializeSILowerSGPRSpillsLegacyPass(PassRegistry &);
@@ -371,6 +371,13 @@ class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> {
MachineFunctionAnalysisManager &AM);
};
+class AMDGPUMarkLastScratchLoadPass
+ : public PassInfoMixin<AMDGPUMarkLastScratchLoadPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &AM);
+};
+
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
ModulePass *createAMDGPUPrintfRuntimeBinding();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
index ba35a1d417173..218b2bff6bd56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
@@ -25,18 +25,26 @@ using namespace llvm;
namespace {
-class AMDGPUMarkLastScratchLoad : public MachineFunctionPass {
+class AMDGPUMarkLastScratchLoad {
private:
LiveStacks *LS = nullptr;
LiveIntervals *LIS = nullptr;
SlotIndexes *SI = nullptr;
const SIInstrInfo *SII = nullptr;
+public:
+ AMDGPUMarkLastScratchLoad(LiveStacks *LS, LiveIntervals *LIS, SlotIndexes *SI)
+ : LS(LS), LIS(LIS), SI(SI) {}
+ bool run(MachineFunction &MF);
+};
+
+class AMDGPUMarkLastScratchLoadLegacy : public MachineFunctionPass {
public:
static char ID;
- AMDGPUMarkLastScratchLoad() : MachineFunctionPass(ID) {
- initializeAMDGPUMarkLastScratchLoadPass(*PassRegistry::getPassRegistry());
+ AMDGPUMarkLastScratchLoadLegacy() : MachineFunctionPass(ID) {
+ initializeAMDGPUMarkLastScratchLoadLegacyPass(
+ *PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -56,17 +64,34 @@ class AMDGPUMarkLastScratchLoad : public MachineFunctionPass {
} // end anonymous namespace
-bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) {
+bool AMDGPUMarkLastScratchLoadLegacy::runOnMachineFunction(
+ MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ auto *LS = &getAnalysis<LiveStacksWrapperLegacy>().getLS();
+ auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+ auto *SI = &getAnalysis<SlotIndexesWrapperPass>().getSI();
+
+ return AMDGPUMarkLastScratchLoad(LS, LIS, SI).run(MF);
+}
+
+PreservedAnalyses
+AMDGPUMarkLastScratchLoadPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto &LS = MFAM.getResult<LiveStacksAnalysis>(MF);
+ auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
+ auto &SI = MFAM.getResult<SlotIndexesAnalysis>(MF);
+
+ AMDGPUMarkLastScratchLoad(&LS, &LIS, &SI).run(MF);
+ return PreservedAnalyses::all();
+}
+
+bool AMDGPUMarkLastScratchLoad::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (ST.getGeneration() < AMDGPUSubtarget::GFX12)
return false;
- LS = &getAnalysis<LiveStacksWrapperLegacy>().getLS();
- LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
- SI = &getAnalysis<SlotIndexesWrapperPass>().getSI();
SII = ST.getInstrInfo();
SlotIndexes &Slots = *LIS->getSlotIndexes();
@@ -130,13 +155,13 @@ bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-char AMDGPUMarkLastScratchLoad::ID = 0;
+char AMDGPUMarkLastScratchLoadLegacy::ID = 0;
-char &llvm::AMDGPUMarkLastScratchLoadID = AMDGPUMarkLastScratchLoad::ID;
+char &llvm::AMDGPUMarkLastScratchLoadID = AMDGPUMarkLastScratchLoadLegacy::ID;
-INITIALIZE_PASS_BEGIN(AMDGPUMarkLastScratchLoad, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(AMDGPUMarkLastScratchLoadLegacy, DEBUG_TYPE,
"AMDGPU Mark last scratch load", false, false)
INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
-INITIALIZE_PASS_END(AMDGPUMarkLastScratchLoad, DEBUG_TYPE,
+INITIALIZE_PASS_END(AMDGPUMarkLastScratchLoadLegacy, DEBUG_TYPE,
"AMDGPU Mark last scratch load", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 6832a17c37177..388c390edad6a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS(
#endif
MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
+MACHINE_FUNCTION_PASS("amdgpu-mark-last-scratch-load", AMDGPUMarkLastScratchLoadPass())
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass())
MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d0454cce15756..827216f8fde59 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -497,7 +497,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURegBankSelectPass(*PR);
initializeAMDGPURegBankLegalizePass(*PR);
initializeSILowerWWMCopiesLegacyPass(*PR);
- initializeAMDGPUMarkLastScratchLoadPass(*PR);
+ initializeAMDGPUMarkLastScratchLoadLegacyPass(*PR);
initializeSILowerSGPRSpillsLegacyPass(*PR);
initializeSIFixSGPRCopiesLegacyPass(*PR);
initializeSIFixVGPRCopiesLegacyPass(*PR);
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir
index cee45216968df..0a2222522b6a6 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -run-pass=greedy -run-pass=amdgpu-mark-last-scratch-load -verify-machineinstrs | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -passes=greedy,amdgpu-mark-last-scratch-load | FileCheck -check-prefix=CHECK %s
--- |
define amdgpu_cs void @test_spill_12x32() "amdgpu-num-vgpr"="12" {
|
@@ -1,5 +1,6 @@ | |||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 | |||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -run-pass=greedy -run-pass=amdgpu-mark-last-scratch-load -verify-machineinstrs | FileCheck -check-prefix=CHECK %s | |||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -passes=greedy,amdgpu-mark-last-scratch-load | FileCheck -check-prefix=CHECK %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically this isn't running the right greedy, but the other line is doing the same
Co-authored-by: Matt Arsenault <[email protected]>
This finishes all passes for the optimized regalloc path.