Skip to content

Commit c564ce0

Browse files
committed
AMDGPU/NewPM: Start filling out addIRPasses
This is not complete, but gets AtomicExpand running. I was able to get further than I expected; we're quite close to having all the IR codegen passes ported.
1 parent afeef4d commit c564ce0

File tree

2 files changed

+103
-0
lines changed

2 files changed

+103
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,16 @@
6565
#include "llvm/Transforms/IPO/GlobalDCE.h"
6666
#include "llvm/Transforms/IPO/Internalize.h"
6767
#include "llvm/Transforms/Scalar.h"
68+
#include "llvm/Transforms/Scalar/EarlyCSE.h"
6869
#include "llvm/Transforms/Scalar/FlattenCFG.h"
6970
#include "llvm/Transforms/Scalar/GVN.h"
7071
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
72+
#include "llvm/Transforms/Scalar/LICM.h"
73+
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
74+
#include "llvm/Transforms/Scalar/NaryReassociate.h"
75+
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
7176
#include "llvm/Transforms/Scalar/Sink.h"
77+
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
7278
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
7379
#include "llvm/Transforms/Utils.h"
7480
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -1769,6 +1775,70 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
17691775
ShadowStackGCLoweringPass>();
17701776
}
17711777

1778+
void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
1779+
// TODO: Missing AMDGPURemoveIncompatibleFunctions
1780+
1781+
addPass(AMDGPUPrintfRuntimeBindingPass());
1782+
if (LowerCtorDtor)
1783+
addPass(AMDGPUCtorDtorLoweringPass());
1784+
1785+
if (isPassEnabled(EnableImageIntrinsicOptimizer))
1786+
addPass(AMDGPUImageIntrinsicOptimizerPass(TM));
1787+
1788+
// This can be disabled by passing ::Disable here or on the command line
1789+
// with --expand-variadics-override=disable.
1790+
addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));
1791+
1792+
addPass(AMDGPUAlwaysInlinePass());
1793+
addPass(AlwaysInlinerPass());
1794+
1795+
// TODO: Missing OpenCLEnqueuedBlockLowering
1796+
1797+
// Runs before PromoteAlloca so the latter can account for function uses
1798+
if (EnableLowerModuleLDS)
1799+
addPass(AMDGPULowerModuleLDSPass(TM));
1800+
1801+
if (TM.getOptLevel() > CodeGenOptLevel::None)
1802+
addPass(InferAddressSpacesPass());
1803+
1804+
// Run atomic optimizer before Atomic Expand
1805+
if (TM.getOptLevel() >= CodeGenOptLevel::Less &&
1806+
(AMDGPUAtomicOptimizerStrategy != ScanOptions::None))
1807+
addPass(AMDGPUAtomicOptimizerPass(TM, AMDGPUAtomicOptimizerStrategy));
1808+
1809+
addPass(AtomicExpandPass());
1810+
1811+
if (TM.getOptLevel() > CodeGenOptLevel::None) {
1812+
addPass(AMDGPUPromoteAllocaPass(TM));
1813+
if (isPassEnabled(EnableScalarIRPasses))
1814+
addStraightLineScalarOptimizationPasses(addPass);
1815+
1816+
// TODO: Handle EnableAMDGPUAliasAnalysis
1817+
1818+
// TODO: May want to move later or split into an early and late one.
1819+
addPass(AMDGPUCodeGenPreparePass(TM));
1820+
1821+
// TODO: LICM
1822+
}
1823+
1824+
Base::addIRPasses(addPass);
1825+
1826+
// EarlyCSE is not always strong enough to clean up what LSR produces. For
1827+
// example, GVN can combine
1828+
//
1829+
// %0 = add %a, %b
1830+
// %1 = add %b, %a
1831+
//
1832+
// and
1833+
//
1834+
// %0 = shl nsw %a, 2
1835+
// %1 = shl %a, 2
1836+
//
1837+
// but EarlyCSE can do neither of them.
1838+
if (isPassEnabled(EnableScalarIRPasses))
1839+
addEarlyCSEOrGVNPass(addPass);
1840+
}
1841+
17721842
void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
17731843
// AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
17741844
// deleted soon.
@@ -1875,3 +1945,33 @@ bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
18751945
return false;
18761946
return Opt;
18771947
}
1948+
1949+
void AMDGPUCodeGenPassBuilder::addEarlyCSEOrGVNPass(AddIRPass &addPass) const {
1950+
if (TM.getOptLevel() == CodeGenOptLevel::Aggressive)
1951+
addPass(GVNPass());
1952+
else
1953+
addPass(EarlyCSEPass());
1954+
}
1955+
1956+
void AMDGPUCodeGenPassBuilder::addStraightLineScalarOptimizationPasses(
1957+
AddIRPass &addPass) const {
1958+
if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
1959+
addPass(LoopDataPrefetchPass());
1960+
1961+
addPass(SeparateConstOffsetFromGEPPass());
1962+
1963+
// ReassociateGEPs exposes more opportunities for SLSR. See
1964+
// the example in reassociate-geps-and-slsr.ll.
1965+
addPass(StraightLineStrengthReducePass());
1966+
1967+
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
1968+
// EarlyCSE can reuse.
1969+
addEarlyCSEOrGVNPass(addPass);
1970+
1971+
// Run NaryReassociate after EarlyCSE/GVN to be more effective.
1972+
addPass(NaryReassociatePass());
1973+
1974+
// NaryReassociate on GEPs creates redundant common expressions, so run
1975+
// EarlyCSE after it.
1976+
addPass(EarlyCSEPass());
1977+
}

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ class AMDGPUCodeGenPassBuilder
172172
const CGPassBuilderOption &Opts,
173173
PassInstrumentationCallbacks *PIC);
174174

175+
void addIRPasses(AddIRPass &) const;
175176
void addCodeGenPrepare(AddIRPass &) const;
176177
void addPreISel(AddIRPass &addPass) const;
177178
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
@@ -182,6 +183,8 @@ class AMDGPUCodeGenPassBuilder
182183
/// given that a pass shall work at an optimization \p Level minimum.
183184
bool isPassEnabled(const cl::opt<bool> &Opt,
184185
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
186+
void addEarlyCSEOrGVNPass(AddIRPass &) const;
187+
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
185188
};
186189

187190
} // end namespace llvm

0 commit comments

Comments
 (0)