Skip to content

Commit 6dca839

Browse files
[NVPTX] Directly control the Machine SSA passes that are invoked for NVPTX.
NVPTX is a bit special in the optimizations it requires, so this gives us better control over the backend optimization pipeline. llvm-svn: 211927
1 parent 7d5bf66 commit 6dca839

File tree

1 file changed

+41
-0
lines changed

1 file changed

+41
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class NVPTXPassConfig : public TargetPassConfig {
106106
bool addInstSelector() override;
107107
bool addPreRegAlloc() override;
108108
bool addPostRegAlloc() override;
109+
void addMachineSSAOptimization() override;
109110

110111
FunctionPass *createTargetRegisterAllocator(bool) override;
111112
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
@@ -207,3 +208,43 @@ void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
207208

208209
printAndVerify("After StackSlotColoring");
209210
}
211+
212+
void NVPTXPassConfig::addMachineSSAOptimization() {
213+
// Pre-ra tail duplication.
214+
if (addPass(&EarlyTailDuplicateID))
215+
printAndVerify("After Pre-RegAlloc TailDuplicate");
216+
217+
// Optimize PHIs before DCE: removing dead PHI cycles may make more
218+
// instructions dead.
219+
addPass(&OptimizePHIsID);
220+
221+
// This pass merges large allocas. StackSlotColoring is a different pass
222+
// which merges spill slots.
223+
addPass(&StackColoringID);
224+
225+
// If the target requests it, assign local variables to stack slots relative
226+
// to one another and simplify frame index references where possible.
227+
addPass(&LocalStackSlotAllocationID);
228+
229+
// With optimization, dead code should already be eliminated. However
230+
// there is one known exception: lowered code for arguments that are only
231+
// used by tail calls, where the tail calls reuse the incoming stack
232+
// arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
233+
addPass(&DeadMachineInstructionElimID);
234+
printAndVerify("After codegen DCE pass");
235+
236+
// Allow targets to insert passes that improve instruction level parallelism,
237+
// like if-conversion. Such passes will typically need dominator trees and
238+
// loop info, just like LICM and CSE below.
239+
if (addILPOpts())
240+
printAndVerify("After ILP optimizations");
241+
242+
addPass(&MachineLICMID);
243+
addPass(&MachineCSEID);
244+
245+
addPass(&MachineSinkingID);
246+
printAndVerify("After Machine LICM, CSE and Sinking passes");
247+
248+
addPass(&PeepholeOptimizerID);
249+
printAndVerify("After codegen peephole optimization pass");
250+
}

0 commit comments

Comments
 (0)