-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][NPM] Port SILateBranchLowering to NPM #130063
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
optimisan
merged 2 commits into
main
from
users/optimisan/preemit/port-si-late-branch-lowering
Mar 26, 2025
Merged
[AMDGPU][NPM] Port SILateBranchLowering to NPM #130063
optimisan
merged 2 commits into
main
from
users/optimisan/preemit/port-si-late-branch-lowering
Mar 26, 2025
+46
−14
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This was referenced Mar 6, 2025
This was referenced Mar 6, 2025
88d2174
to
39768ea
Compare
5f050b8
to
33a5201
Compare
39768ea
to
79a590f
Compare
33a5201
to
70a3b58
Compare
@llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) ChangesFull diff: https://github.com/llvm/llvm-project/pull/130063.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b434676f85581..d1dc62e9cc526 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -213,7 +213,7 @@ extern char &SILowerControlFlowLegacyID;
void initializeSIPreEmitPeepholePass(PassRegistry &);
extern char &SIPreEmitPeepholeID;
-void initializeSILateBranchLoweringPass(PassRegistry &);
+void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
extern char &SILateBranchLoweringPassID;
void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &);
@@ -384,6 +384,14 @@ class SIInsertHardClausesPass : public PassInfoMixin<SIInsertHardClausesPass> {
MachineFunctionAnalysisManager &MFAM);
};
+class SILateBranchLoweringPass
+ : public PassInfoMixin<SILateBranchLoweringPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+ static bool isRequired() { return true; }
+};
+
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
ModulePass *createAMDGPUPrintfRuntimeBinding();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 3eabe087a8a33..318aad5590cda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
+MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizations
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 6c24fe5f1441a..b9d62cc9e4b63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -540,7 +540,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR);
- initializeSILateBranchLoweringPass(*PR);
+ initializeSILateBranchLoweringLegacyPass(*PR);
initializeSIMemoryLegalizerLegacyPass(*PR);
initializeSIOptimizeExecMaskingLegacyPass(*PR);
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
@@ -2161,7 +2161,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
// TODO: addPass(SIInsertHardClausesPass());
}
- // addPass(SILateBranchLoweringPass());
+ addPass(SILateBranchLoweringPass());
+
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) {
// TODO: addPass(AMDGPUSetWavePriorityPass());
}
diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index d02173f57ee37..0f5b6bd9374b0 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -16,6 +16,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePassManager.h"
using namespace llvm;
@@ -23,7 +24,7 @@ using namespace llvm;
namespace {
-class SILateBranchLowering : public MachineFunctionPass {
+class SILateBranchLowering {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
@@ -33,14 +34,23 @@ class SILateBranchLowering : public MachineFunctionPass {
void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
public:
- static char ID;
+ SILateBranchLowering(MachineDominatorTree *MDT) : MDT(MDT) {}
+
+ bool run(MachineFunction &MF);
unsigned MovOpc;
Register ExecReg;
+};
- SILateBranchLowering() : MachineFunctionPass(ID) {}
+class SILateBranchLoweringLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+ SILateBranchLoweringLegacy() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ return SILateBranchLowering(MDT).run(MF);
+ }
StringRef getPassName() const override {
return "SI Final Branch Preparation";
@@ -55,15 +65,15 @@ class SILateBranchLowering : public MachineFunctionPass {
} // end anonymous namespace
-char SILateBranchLowering::ID = 0;
+char SILateBranchLoweringLegacy::ID = 0;
-INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(SILateBranchLoweringLegacy, DEBUG_TYPE,
"SI insert s_cbranch_execz instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE,
+INITIALIZE_PASS_END(SILateBranchLoweringLegacy, DEBUG_TYPE,
"SI insert s_cbranch_execz instructions", false, false)
-char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
+char &llvm::SILateBranchLoweringPassID = SILateBranchLoweringLegacy::ID;
static void generateEndPgm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
@@ -144,11 +154,21 @@ void SILateBranchLowering::earlyTerm(MachineInstr &MI,
MDT->insertEdge(&MBB, EarlyExitBlock);
}
-bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+llvm::SILateBranchLoweringPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
+ if (!SILateBranchLowering(MDT).run(MF))
+ return PreservedAnalyses::all();
+
+ return getMachineFunctionPassPreservedAnalyses()
+ .preserve<MachineDominatorTreeAnalysis>();
+}
+
+bool SILateBranchLowering::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
- MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir
index 77bc9729ee845..3d75d405a46d3 100644
--- a/llvm/test/CodeGen/AMDGPU/early-term.mir
+++ b/llvm/test/CodeGen/AMDGPU/early-term.mir
@@ -2,6 +2,8 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -passes=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s
+
--- |
define amdgpu_ps void @early_term_scc0_end_block() {
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
index 6a286eafa6d58..a4c05aa781df7 100644
--- a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
+++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
@@ -1,4 +1,5 @@
# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -run-pass=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s
+# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -passes=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s
# GCN-LABEL: readlane_exec0
# GCN: bb.0
|
arsenm
approved these changes
Mar 10, 2025
79a590f
to
48ec9e7
Compare
70a3b58
to
03b1e8e
Compare
cdevadas
approved these changes
Mar 10, 2025
48ec9e7
to
78cee57
Compare
03b1e8e
to
f095da2
Compare
78cee57
to
3a314ee
Compare
Base automatically changed from
users/optimisan/preemit/port-si-insert-hard-clauses
to
main
March 25, 2025 10:03
f095da2
to
0d5f939
Compare
arsenm
approved these changes
Mar 26, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.