8
8
9
9
#include " AMDGPUCodeGenPassBuilder.h"
10
10
#include " AMDGPU.h"
11
+ #include " AMDGPUCtorDtorLowering.h"
11
12
#include " AMDGPUISelDAGToDAG.h"
12
13
#include " AMDGPUPerfHintAnalysis.h"
13
14
#include " AMDGPUTargetMachine.h"
14
15
#include " AMDGPUUnifyDivergentExitNodes.h"
15
16
#include " SIFixSGPRCopies.h"
16
17
#include " llvm/Analysis/UniformityAnalysis.h"
18
+ #include " llvm/Transforms/IPO/AlwaysInliner.h"
19
+ #include " llvm/Transforms/IPO/ExpandVariadics.h"
20
+ #include " llvm/Transforms/Scalar/EarlyCSE.h"
17
21
#include " llvm/Transforms/Scalar/FlattenCFG.h"
22
+ #include " llvm/Transforms/Scalar/GVN.h"
23
+ #include " llvm/Transforms/Scalar/InferAddressSpaces.h"
24
+ #include " llvm/Transforms/Scalar/LoopDataPrefetch.h"
25
+ #include " llvm/Transforms/Scalar/NaryReassociate.h"
26
+ #include " llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
18
27
#include " llvm/Transforms/Scalar/Sink.h"
28
+ #include " llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
19
29
#include " llvm/Transforms/Scalar/StructurizeCFG.h"
20
30
#include " llvm/Transforms/Utils/FixIrreducible.h"
21
31
#include " llvm/Transforms/Utils/LCSSA.h"
@@ -38,6 +48,70 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
38
48
ShadowStackGCLoweringPass>();
39
49
}
40
50
51
+ void AMDGPUCodeGenPassBuilder::addIRPasses (AddIRPass &addPass) const {
52
+ // TODO: Missing AMDGPURemoveIncompatibleFunctions
53
+
54
+ addPass (AMDGPUPrintfRuntimeBindingPass ());
55
+ if (LowerCtorDtor)
56
+ addPass (AMDGPUCtorDtorLoweringPass ());
57
+
58
+ if (isPassEnabled (EnableImageIntrinsicOptimizer))
59
+ addPass (AMDGPUImageIntrinsicOptimizerPass (TM));
60
+
61
+ // This can be disabled by passing ::Disable here or on the command line
62
+ // with --expand-variadics-override=disable.
63
+ addPass (ExpandVariadicsPass (ExpandVariadicsMode::Lowering));
64
+
65
+ addPass (AMDGPUAlwaysInlinePass ());
66
+ addPass (AlwaysInlinerPass ());
67
+
68
+ // TODO: Missing OpenCLEnqueuedBlockLowering
69
+
70
+ // Runs before PromoteAlloca so the latter can account for function uses
71
+ if (EnableLowerModuleLDS)
72
+ addPass (AMDGPULowerModuleLDSPass (TM));
73
+
74
+ if (TM.getOptLevel () > CodeGenOptLevel::None)
75
+ addPass (InferAddressSpacesPass ());
76
+
77
+ // Run atomic optimizer before Atomic Expand
78
+ if (TM.getOptLevel () >= CodeGenOptLevel::Less &&
79
+ (AMDGPUAtomicOptimizerStrategy != ScanOptions::None))
80
+ addPass (AMDGPUAtomicOptimizerPass (TM, AMDGPUAtomicOptimizerStrategy));
81
+
82
+ addPass (AtomicExpandPass ());
83
+
84
+ if (TM.getOptLevel () > CodeGenOptLevel::None) {
85
+ addPass (AMDGPUPromoteAllocaPass (TM));
86
+ if (isPassEnabled (EnableScalarIRPasses))
87
+ addStraightLineScalarOptimizationPasses (addPass);
88
+
89
+ // TODO: Handle EnableAMDGPUAliasAnalysis
90
+
91
+ // TODO: May want to move later or split into an early and late one.
92
+ addPass (AMDGPUCodeGenPreparePass (TM));
93
+
94
+ // TODO: LICM
95
+ }
96
+
97
+ Base::addIRPasses (addPass);
98
+
99
+ // EarlyCSE is not always strong enough to clean up what LSR produces. For
100
+ // example, GVN can combine
101
+ //
102
+ // %0 = add %a, %b
103
+ // %1 = add %b, %a
104
+ //
105
+ // and
106
+ //
107
+ // %0 = shl nsw %a, 2
108
+ // %1 = shl %a, 2
109
+ //
110
+ // but EarlyCSE can do neither of them.
111
+ if (isPassEnabled (EnableScalarIRPasses))
112
+ addEarlyCSEOrGVNPass (addPass);
113
+ }
114
+
41
115
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare (AddIRPass &addPass) const {
42
116
// AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
43
117
// deleted soon.
@@ -136,6 +210,36 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
136
210
return Error::success ();
137
211
}
138
212
213
+ void AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass (AddIRPass &addPass) const {
214
+ if (TM.getOptLevel () == CodeGenOptLevel::Aggressive)
215
+ addPass (GVNPass ());
216
+ else
217
+ addPass (EarlyCSEPass ());
218
+ }
219
+
220
+ void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses (
221
+ AddIRPass &addPass) const {
222
+ if (isPassEnabled (EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
223
+ addPass (LoopDataPrefetchPass ());
224
+
225
+ addPass (SeparateConstOffsetFromGEPPass ());
226
+
227
+ // ReassociateGEPs exposes more opportunities for SLSR. See
228
+ // the example in reassociate-geps-and-slsr.ll.
229
+ addPass (StraightLineStrengthReducePass ());
230
+
231
+ // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
232
+ // EarlyCSE can reuse.
233
+ addEarlyCSEOrGVNPass (addPass);
234
+
235
+ // Run NaryReassociate after EarlyCSE/GVN to be more effective.
236
+ addPass (NaryReassociatePass ());
237
+
238
+ // NaryReassociate on GEPs creates redundant common expressions, so run
239
+ // EarlyCSE after it.
240
+ addPass (EarlyCSEPass ());
241
+ }
242
+
139
243
bool AMDGPUCodeGenPassBuilder::isPassEnabled (const cl::opt<bool > &Opt,
140
244
CodeGenOptLevel Level) const {
141
245
if (Opt.getNumOccurrences ())
0 commit comments