21
21
#include " llvm/Transforms/Utils/LCSSA.h"
22
22
#include " llvm/Transforms/Utils/LowerSwitch.h"
23
23
#include " llvm/Transforms/Utils/UnifyLoopExits.h"
24
+ #include " llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
24
25
25
26
using namespace llvm ;
27
+ using namespace llvm ::AMDGPU;
26
28
27
29
AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder (
28
30
GCNTargetMachine &TM, const CGPassBuilderOption &Opts,
@@ -37,8 +39,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
37
39
}
38
40
39
41
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare (AddIRPass &addPass) const {
42
+ // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
43
+ // deleted soon.
44
+
45
+ if (EnableLowerKernelArguments)
46
+ addPass (AMDGPULowerKernelArgumentsPass (TM));
47
+
48
+ // This lowering has been placed after codegenprepare to take advantage of
49
+ // address mode matching (which is why it isn't put with the LDS lowerings).
50
+ // It could be placed anywhere before uniformity annotations (an analysis
51
+ // that it changes by splitting up fat pointers into their components)
52
+ // but has been put before switch lowering and CFG flattening so that those
53
+ // passes can run on the more optimized control flow this pass creates in
54
+ // many cases.
55
+ //
56
+ // FIXME: This should ideally be put after the LoadStoreVectorizer.
57
+ // However, due to some annoying facts about ResourceUsageAnalysis,
58
+ // (especially as exercised in the resource-usage-dead-function test),
59
+ // we need all the function passes codegenprepare all the way through
60
+ // said resource usage analysis to run on the call graph produced
61
+ // before codegenprepare runs (because codegenprepare will knock some
62
+ // nodes out of the graph, which leads to function-level passes not
63
+ // being run on them, which causes crashes in the resource usage analysis).
64
+ addPass (AMDGPULowerBufferFatPointersPass (TM));
65
+
40
66
Base::addCodeGenPrepare (addPass);
41
67
68
+ if (isPassEnabled (EnableLoadStoreVectorizer))
69
+ addPass (LoadStoreVectorizerPass ());
70
+
42
71
// LowerSwitch pass may introduce unreachable blocks that can cause unexpected
43
72
// behavior for subsequent passes. Placing it here seems better that these
44
73
// blocks would get cleaned up by UnreachableBlockElim inserted next in the
@@ -106,3 +135,12 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
106
135
addPass (SILowerI1CopiesPass ());
107
136
return Error::success ();
108
137
}
138
+
139
+ bool AMDGPUCodeGenPassBuilder::isPassEnabled (const cl::opt<bool > &Opt,
140
+ CodeGenOptLevel Level) const {
141
+ if (Opt.getNumOccurrences ())
142
+ return Opt;
143
+ if (TM.getOptLevel () < Level)
144
+ return false ;
145
+ return Opt;
146
+ }
0 commit comments