Skip to content

Commit 208332d

Browse files
committed
[AMDGPU] Add Optimize VGPR LiveRange Pass.
This pass aims to optimize VGPR live-range in a typical divergent if-else control flow. For example: def(a) if(cond) use(a) ... // A else use(a) As AMDGPU access vgpr with respect to active-mask, we can mark `a` as dead in region A. For details, please refer to the comments in implementation file. The pass is enabled by default, the frontend can disable it through "-amdgpu-opt-vgpr-liverange=false". Differential Revision: https://reviews.llvm.org/D102212
1 parent 11e9a72 commit 208332d

15 files changed

+3064
-2135
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ FunctionPass *createSILoadStoreOptimizerPass();
5656
FunctionPass *createSIWholeQuadModePass();
5757
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
5858
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
59+
FunctionPass *createSIOptimizeVGPRLiveRangePass();
5960
FunctionPass *createSIFixSGPRCopiesPass();
6061
FunctionPass *createSIMemoryLegalizerPass();
6162
FunctionPass *createSIInsertWaitcntsPass();
@@ -297,6 +298,9 @@ struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
297298
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
298299
extern char &SIOptimizeExecMaskingPreRAID;
299300

301+
void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
302+
extern char &SIOptimizeVGPRLiveRangeID;
303+
300304
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
301305
extern char &AMDGPUAnnotateUniformValuesPassID;
302306

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,11 @@ static cl::opt<bool> EnableRegReassign(
162162
cl::init(true),
163163
cl::Hidden);
164164

165+
static cl::opt<bool> OptVGPRLiveRange(
166+
"amdgpu-opt-vgpr-liverange",
167+
cl::desc("Enable VGPR liverange optimizations for if-else structure"),
168+
cl::init(true), cl::Hidden);
169+
165170
// Enable atomic optimization
166171
static cl::opt<bool> EnableAtomicOptimizations(
167172
"amdgpu-atomic-optimizations",
@@ -225,6 +230,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
225230
initializeSIPeepholeSDWAPass(*PR);
226231
initializeSIShrinkInstructionsPass(*PR);
227232
initializeSIOptimizeExecMaskingPreRAPass(*PR);
233+
initializeSIOptimizeVGPRLiveRangePass(*PR);
228234
initializeSILoadStoreOptimizerPass(*PR);
229235
initializeAMDGPUFixFunctionBitcastsPass(*PR);
230236
initializeAMDGPUAlwaysInlinePass(*PR);
@@ -1190,6 +1196,12 @@ void GCNPassConfig::addOptimizedRegAlloc() {
11901196
if (TM->getOptLevel() > CodeGenOpt::Less)
11911197
insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
11921198

1199+
// FIXME: when an instruction has a Killed operand, and the instruction is
1200+
// inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1201+
// the register in LiveVariables, this would trigger a failure in verifier,
1202+
// we should fix it and enable the verifier.
1203+
if (OptVGPRLiveRange)
1204+
insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID, false);
11931205
// This must be run immediately after phi elimination and before
11941206
// TwoAddressInstructions, otherwise the processing of the tied operand of
11951207
// SI_ELSE will introduce a copy of the tied operand source after the else.

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ add_llvm_target(AMDGPUCodeGen
132132
SIMemoryLegalizer.cpp
133133
SIOptimizeExecMasking.cpp
134134
SIOptimizeExecMaskingPreRA.cpp
135+
SIOptimizeVGPRLiveRange.cpp
135136
SIPeepholeSDWA.cpp
136137
SIPostRABundler.cpp
137138
SIPreEmitPeephole.cpp

0 commit comments

Comments
 (0)