Skip to content

Commit bc07cf6

Browse files
committed
AMDGPU/NewPM: Fill out passes in addCodeGenPrepare
AMDGPUAnnotateKernelFeatures hasn't been ported yet, but it should be soon removable.
1 parent dc7ab09 commit bc07cf6

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
#include "llvm/Transforms/Utils/LCSSA.h"
2222
#include "llvm/Transforms/Utils/LowerSwitch.h"
2323
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
24+
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
2425

2526
using namespace llvm;
27+
using namespace llvm::AMDGPU;
2628

2729
AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
2830
GCNTargetMachine &TM, const CGPassBuilderOption &Opts,
@@ -37,8 +39,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
3739
}
3840

3941
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
42+
// AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
43+
// deleted soon.
44+
45+
if (EnableLowerKernelArguments)
46+
addPass(AMDGPULowerKernelArgumentsPass(TM));
47+
48+
// This lowering has been placed after codegenprepare to take advantage of
49+
// address mode matching (which is why it isn't put with the LDS lowerings).
50+
// It could be placed anywhere before uniformity annotations (an analysis
51+
// that it changes by splitting up fat pointers into their components)
52+
// but has been put before switch lowering and CFG flattening so that those
53+
// passes can run on the more optimized control flow this pass creates in
54+
// many cases.
55+
//
56+
// FIXME: This should ideally be put after the LoadStoreVectorizer.
57+
// However, due to some annoying facts about ResourceUsageAnalysis,
58+
// (especially as exercised in the resource-usage-dead-function test),
59+
// we need all the function passes codegenprepare all the way through
60+
// said resource usage analysis to run on the call graph produced
61+
// before codegenprepare runs (because codegenprepare will knock some
62+
// nodes out of the graph, which leads to function-level passes not
63+
// being run on them, which causes crashes in the resource usage analysis).
64+
addPass(AMDGPULowerBufferFatPointersPass(TM));
65+
4066
Base::addCodeGenPrepare(addPass);
4167

68+
if (isPassEnabled(EnableLoadStoreVectorizer))
69+
addPass(LoadStoreVectorizerPass());
70+
4271
// LowerSwitch pass may introduce unreachable blocks that can cause unexpected
4372
// behavior for subsequent passes. Placing it here seems better that these
4473
// blocks would get cleaned up by UnreachableBlockElim inserted next in the
@@ -106,3 +135,12 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
106135
addPass(SILowerI1CopiesPass());
107136
return Error::success();
108137
}
138+
139+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
140+
CodeGenOptLevel Level) const {
141+
if (Opt.getNumOccurrences())
142+
return Opt;
143+
if (TM.getOptLevel() < Level)
144+
return false;
145+
return Opt;
146+
}

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ class AMDGPUCodeGenPassBuilder
2828
void addPreISel(AddIRPass &addPass) const;
2929
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
3030
Error addInstSelector(AddMachinePass &) const;
31+
32+
/// Check if a pass is enabled given \p Opt option. The option always
33+
/// overrides defaults if explicitly used. Otherwise its default will
34+
/// be used given that a pass shall work at an optimization \p Level
35+
/// minimum.
36+
bool isPassEnabled(const cl::opt<bool> &Opt,
37+
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
3138
};
3239

3340
} // namespace llvm

0 commit comments

Comments
 (0)