Skip to content

Commit 185d421

Browse files
committed
AMDGPU/NewPM: Start filling out addIRPasses
This is not complete, but gets AtomicExpand running. I was able to get further than I expected; we're quite close to having all the IR codegen passes ported.
1 parent 9aff06f commit 185d421

File tree

3 files changed

+110
-0
lines changed

3 files changed

+110
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,24 @@
88

99
#include "AMDGPUCodeGenPassBuilder.h"
1010
#include "AMDGPU.h"
11+
#include "AMDGPUCtorDtorLowering.h"
1112
#include "AMDGPUISelDAGToDAG.h"
1213
#include "AMDGPUPerfHintAnalysis.h"
1314
#include "AMDGPUTargetMachine.h"
1415
#include "AMDGPUUnifyDivergentExitNodes.h"
1516
#include "SIFixSGPRCopies.h"
1617
#include "llvm/Analysis/UniformityAnalysis.h"
18+
#include "llvm/Transforms/IPO/AlwaysInliner.h"
19+
#include "llvm/Transforms/IPO/ExpandVariadics.h"
20+
#include "llvm/Transforms/Scalar/EarlyCSE.h"
1721
#include "llvm/Transforms/Scalar/FlattenCFG.h"
22+
#include "llvm/Transforms/Scalar/GVN.h"
23+
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
24+
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
25+
#include "llvm/Transforms/Scalar/NaryReassociate.h"
26+
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
1827
#include "llvm/Transforms/Scalar/Sink.h"
28+
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
1929
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
2030
#include "llvm/Transforms/Utils/FixIrreducible.h"
2131
#include "llvm/Transforms/Utils/LCSSA.h"
@@ -38,6 +48,70 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
3848
ShadowStackGCLoweringPass>();
3949
}
4050

51+
void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
52+
// TODO: Missing AMDGPURemoveIncompatibleFunctions
53+
54+
addPass(AMDGPUPrintfRuntimeBindingPass());
55+
if (LowerCtorDtor)
56+
addPass(AMDGPUCtorDtorLoweringPass());
57+
58+
if (isPassEnabled(EnableImageIntrinsicOptimizer))
59+
addPass(AMDGPUImageIntrinsicOptimizerPass(TM));
60+
61+
// This can be disabled by passing ::Disable here or on the command line
62+
// with --expand-variadics-override=disable.
63+
addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));
64+
65+
addPass(AMDGPUAlwaysInlinePass());
66+
addPass(AlwaysInlinerPass());
67+
68+
// TODO: Missing OpenCLEnqueuedBlockLowering
69+
70+
// Runs before PromoteAlloca so the latter can account for function uses
71+
if (EnableLowerModuleLDS)
72+
addPass(AMDGPULowerModuleLDSPass(TM));
73+
74+
if (TM.getOptLevel() > CodeGenOptLevel::None)
75+
addPass(InferAddressSpacesPass());
76+
77+
// Run atomic optimizer before Atomic Expand
78+
if (TM.getOptLevel() >= CodeGenOptLevel::Less &&
79+
(AMDGPUAtomicOptimizerStrategy != ScanOptions::None))
80+
addPass(AMDGPUAtomicOptimizerPass(TM, AMDGPUAtomicOptimizerStrategy));
81+
82+
addPass(AtomicExpandPass());
83+
84+
if (TM.getOptLevel() > CodeGenOptLevel::None) {
85+
addPass(AMDGPUPromoteAllocaPass(TM));
86+
if (isPassEnabled(EnableScalarIRPasses))
87+
addStraightLineScalarOptimizationPasses(addPass);
88+
89+
// TODO: Handle EnableAMDGPUAliasAnalysis
90+
91+
// TODO: May want to move later or split into an early and late one.
92+
addPass(AMDGPUCodeGenPreparePass(TM));
93+
94+
// TODO: LICM
95+
}
96+
97+
Base::addIRPasses(addPass);
98+
99+
// EarlyCSE is not always strong enough to clean up what LSR produces. For
100+
// example, GVN can combine
101+
//
102+
// %0 = add %a, %b
103+
// %1 = add %b, %a
104+
//
105+
// and
106+
//
107+
// %0 = shl nsw %a, 2
108+
// %1 = shl %a, 2
109+
//
110+
// but EarlyCSE can do neither of them.
111+
if (isPassEnabled(EnableScalarIRPasses))
112+
addEarlyCSEOrGVNPass(addPass);
113+
}
114+
41115
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
42116
// AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
43117
// deleted soon.
@@ -136,6 +210,36 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
136210
return Error::success();
137211
}
138212

213+
void AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass(AddIRPass &addPass) const {
214+
if (TM.getOptLevel() == CodeGenOptLevel::Aggressive)
215+
addPass(GVNPass());
216+
else
217+
addPass(EarlyCSEPass());
218+
}
219+
220+
void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses(
221+
AddIRPass &addPass) const {
222+
if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
223+
addPass(LoopDataPrefetchPass());
224+
225+
addPass(SeparateConstOffsetFromGEPPass());
226+
227+
// ReassociateGEPs exposes more opportunities for SLSR. See
228+
// the example in reassociate-geps-and-slsr.ll.
229+
addPass(StraightLineStrengthReducePass());
230+
231+
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
232+
// EarlyCSE can reuse.
233+
addEarlyCSEOrGVNPass(addPass);
234+
235+
// Run NaryReassociate after EarlyCSE/GVN to be more effective.
236+
addPass(NaryReassociatePass());
237+
238+
// NaryReassociate on GEPs creates redundant common expressions, so run
239+
// EarlyCSE after it.
240+
addPass(EarlyCSEPass());
241+
}
242+
139243
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
140244
CodeGenOptLevel Level) const {
141245
if (Opt.getNumOccurrences())

llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,16 @@ class AMDGPUCodeGenPassBuilder
2424
AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
2525
const CGPassBuilderOption &Opts,
2626
PassInstrumentationCallbacks *PIC);
27+
28+
void addIRPasses(AddIRPass &) const;
2729
void addCodeGenPrepare(AddIRPass &) const;
2830
void addPreISel(AddIRPass &addPass) const;
2931
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
3032
Error addInstSelector(AddMachinePass &) const;
3133

34+
void addEarlyCSEOrGVNPass(AddIRPass &) const;
35+
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
36+
3237
/// Check if a pass is enabled given \p Opt option. The option always
3338
/// overrides defaults if explicitly used. Otherwise its default will
3439
/// be used given that a pass shall work at an optimization \p Level

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
#include "llvm/Transforms/Scalar.h"
6767
#include "llvm/Transforms/Scalar/GVN.h"
6868
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
69+
#include "llvm/Transforms/Scalar/LICM.h"
6970
#include "llvm/Transforms/Utils.h"
7071
#include "llvm/Transforms/Utils/LowerSwitch.h"
7172
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"

0 commit comments

Comments
 (0)