Skip to content

Commit fb3aa17

Browse files
author
Jingyue Wu
committed
[NVPTX] Disable performance optimizations when OptLevel==None
Reviewers: jholewinski, tra, eliben Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D16874 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259749 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0e71de1 commit fb3aa17

File tree

2 files changed

+48
-21
lines changed

2 files changed

+48
-21
lines changed

lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,20 @@ class NVPTXPassConfig : public TargetPassConfig {
143143
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
144144

145145
private:
146-
// if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
146+
// If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
147+
// function is only called in opt mode.
147148
void addEarlyCSEOrGVNPass();
149+
150+
// Add passes that propagate special memory spaces.
151+
void addMemorySpaceInferencePasses();
152+
153+
// Add passes that perform straight-line scalar optimizations.
154+
void addStraightLineScalarOptimizationPasses();
148155
};
149156
} // end anonymous namespace
150157

151158
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
152-
NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
153-
return PassConfig;
159+
return new NVPTXPassConfig(this, PM);
154160
}
155161

156162
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
@@ -166,22 +172,7 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
166172
addPass(createEarlyCSEPass());
167173
}
168174

169-
void NVPTXPassConfig::addIRPasses() {
170-
// The following passes are known to not play well with virtual regs hanging
171-
// around after register allocation (which in our case, is *all* registers).
172-
// We explicitly disable them here. We do, however, need some functionality
173-
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
174-
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
175-
disablePass(&PrologEpilogCodeInserterID);
176-
disablePass(&MachineCopyPropagationID);
177-
disablePass(&TailDuplicateID);
178-
179-
addPass(createNVVMReflectPass());
180-
addPass(createNVPTXImageOptimizerPass());
181-
addPass(createNVPTXAssignValidGlobalNamesPass());
182-
addPass(createGenericToNVVMPass());
183-
184-
// === Propagate special address spaces ===
175+
void NVPTXPassConfig::addMemorySpaceInferencePasses() {
185176
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
186177
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
187178
// be eliminated by SROA.
@@ -192,8 +183,9 @@ void NVPTXPassConfig::addIRPasses() {
192183
// them unused. We could remove dead code in an ad-hoc manner, but that
193184
// requires manual work and might be error-prone.
194185
addPass(createDeadCodeEliminationPass());
186+
}
195187

196-
// === Straight-line scalar optimizations ===
188+
void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
197189
addPass(createSeparateConstOffsetFromGEPPass());
198190
addPass(createSpeculativeExecutionPass());
199191
// ReassociateGEPs exposes more opportunites for SLSR. See
@@ -208,6 +200,28 @@ void NVPTXPassConfig::addIRPasses() {
208200
// NaryReassociate on GEPs creates redundant common expressions, so run
209201
// EarlyCSE after it.
210202
addPass(createEarlyCSEPass());
203+
}
204+
205+
void NVPTXPassConfig::addIRPasses() {
206+
// The following passes are known to not play well with virtual regs hanging
207+
// around after register allocation (which in our case, is *all* registers).
208+
// We explicitly disable them here. We do, however, need some functionality
209+
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
210+
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
211+
disablePass(&PrologEpilogCodeInserterID);
212+
disablePass(&MachineCopyPropagationID);
213+
disablePass(&TailDuplicateID);
214+
215+
addPass(createNVVMReflectPass());
216+
if (getOptLevel() != CodeGenOpt::None)
217+
addPass(createNVPTXImageOptimizerPass());
218+
addPass(createNVPTXAssignValidGlobalNamesPass());
219+
addPass(createGenericToNVVMPass());
220+
221+
if (getOptLevel() != CodeGenOpt::None) {
222+
addMemorySpaceInferencePasses();
223+
addStraightLineScalarOptimizationPasses();
224+
}
211225

212226
// === LSR and other generic IR passes ===
213227
TargetPassConfig::addIRPasses();
@@ -223,7 +237,8 @@ void NVPTXPassConfig::addIRPasses() {
223237
// %1 = shl %a, 2
224238
//
225239
// but EarlyCSE can do neither of them.
226-
addEarlyCSEOrGVNPass();
240+
if (getOptLevel() != CodeGenOpt::None)
241+
addEarlyCSEOrGVNPass();
227242
}
228243

229244
bool NVPTXPassConfig::addInstSelector() {

test/CodeGen/NVPTX/disable-opt.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s
2+
3+
define void @foo(i32* %output) {
4+
; CHECK-LABEL: .visible .func foo(
5+
entry:
6+
%local = alloca i32
7+
; CHECK: __local_depot
8+
store i32 1, i32* %local
9+
%0 = load i32, i32* %local
10+
store i32 %0, i32* %output
11+
ret void
12+
}

0 commit comments

Comments
 (0)