Skip to content

Commit 908ac47

Browse files
committed
[NPM][LTO] Update buildLTODefaultPipeline to be more in-line with the old pass manager
The NPM LTO pipeline has a lot of fixme's and missing passes, causing a lot of regressions after the switch in c70737b. Notably unrolling and vectorization were both disabled, but many other passes are missing compared to the old pass manager. This attempt to enable the most obvious missing passes like the unroller, vectorization and other loop passes, fixing the existing FIXME comments. Differential Revision: https://reviews.llvm.org/D96780
1 parent 0eeaec2 commit 908ac47

File tree

2 files changed

+140
-77
lines changed

2 files changed

+140
-77
lines changed

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,14 +1727,15 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
17271727
// Run a few AA driver optimizations here and now to cleanup the code.
17281728
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
17291729

1730-
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1731-
PostOrderFunctionAttrsPass()));
1730+
MPM.addPass(
1731+
createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
17321732
// FIXME: here we run IP alias analysis in the legacy PM.
17331733

17341734
FunctionPassManager MainFPM;
17351735

1736-
// FIXME: once we fix LoopPass Manager, add LICM here.
1737-
// FIXME: once we provide support for enabling MLSM, add it here.
1736+
MainFPM.addPass(createFunctionToLoopPassAdaptor(
1737+
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)));
1738+
17381739
if (RunNewGVN)
17391740
MainFPM.addPass(NewGVNPass());
17401741
else
@@ -1745,24 +1746,57 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
17451746

17461747
// Nuke dead stores.
17471748
MainFPM.addPass(DSEPass());
1749+
MainFPM.addPass(MergedLoadStoreMotionPass());
17481750

1749-
// FIXME: at this point, we run a bunch of loop passes:
1750-
// indVarSimplify, loopDeletion, loopInterchange, loopUnroll,
1751-
// loopVectorize. Enable them once the remaining issue with LPM
1752-
// are sorted out.
1751+
// More loops are countable; try to optimize them.
1752+
if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1753+
MainFPM.addPass(LoopFlattenPass());
1754+
1755+
if (EnableConstraintElimination)
1756+
MainFPM.addPass(ConstraintEliminationPass());
1757+
1758+
LoopPassManager LPM(DebugLogging);
1759+
LPM.addPass(IndVarSimplifyPass());
1760+
LPM.addPass(LoopDeletionPass());
1761+
// FIXME: Add loop interchange.
1762+
1763+
// Unroll small loops and perform peeling.
1764+
LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
1765+
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
1766+
PTO.ForgetAllSCEVInLoopUnroll));
1767+
MainFPM.addPass(createFunctionToLoopPassAdaptor(
1768+
std::move(LPM), EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true,
1769+
DebugLogging));
1770+
1771+
MainFPM.addPass(LoopDistributePass());
1772+
MainFPM.addPass(LoopVectorizePass(
1773+
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1774+
// The vectorizer may have significantly shortened a loop body; unroll again.
1775+
MainFPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1776+
Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1777+
PTO.ForgetAllSCEVInLoopUnroll)));
1778+
1779+
MainFPM.addPass(WarnMissedTransformationsPass());
17531780

17541781
MainFPM.addPass(InstCombinePass());
17551782
MainFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
17561783
MainFPM.addPass(SCCPPass());
17571784
MainFPM.addPass(InstCombinePass());
17581785
MainFPM.addPass(BDCEPass());
17591786

1760-
// FIXME: We may want to run SLPVectorizer here.
1761-
// After vectorization, assume intrinsics may tell us more
1762-
// about pointer alignments.
1763-
#if 0
1764-
MainFPM.add(AlignmentFromAssumptionsPass());
1765-
#endif
1787+
// More scalar chains could be vectorized due to more alias information
1788+
if (PTO.SLPVectorization) {
1789+
MainFPM.addPass(SLPVectorizerPass());
1790+
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1791+
MainFPM.addPass(EarlyCSEPass());
1792+
}
1793+
}
1794+
1795+
MainFPM.addPass(VectorCombinePass()); // Clean up partial vectorization.
1796+
1797+
// After vectorization, assume intrinsics may tell us more about pointer
1798+
// alignments.
1799+
MainFPM.addPass(AlignmentFromAssumptionsPass());
17661800

17671801
// FIXME: Conditionally run LoadCombine here, after it's ported
17681802
// (in case we still have this pass, given its questionable usefulness).

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 92 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,22 @@
77
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O1
88
; RUN: opt -disable-verify -debug-pass-manager \
99
; RUN: -passes='lto<O2>' -S %s 2>&1 \
10-
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
10+
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ \
11+
; RUN: --check-prefix=CHECK-O2
1112
; RUN: opt -disable-verify -debug-pass-manager \
1213
; RUN: -passes='lto<O3>' -S %s 2>&1 \
13-
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \
14+
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ \
1415
; RUN: --check-prefix=CHECK-O3
1516
; RUN: opt -disable-verify -debug-pass-manager \
1617
; RUN: -passes='lto<Os>' -S %s 2>&1 \
17-
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
18+
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ \
19+
; RUN: --check-prefix=CHECK-OS
1820
; RUN: opt -disable-verify -debug-pass-manager \
1921
; RUN: -passes='lto<Oz>' -S %s 2>&1 \
20-
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2
22+
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ
2123
; RUN: opt -disable-verify -debug-pass-manager \
2224
; RUN: -passes='lto<O3>' -S %s -passes-ep-peephole='no-op-function' 2>&1 \
23-
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \
25+
; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O23SZ \
2426
; RUN: --check-prefix=CHECK-O3 --check-prefix=CHECK-EP-Peephole
2527

2628
; CHECK-O: Starting llvm::Module pass manager run.
@@ -30,18 +32,18 @@
3032
; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass
3133
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Module
3234
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
33-
; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
34-
; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass on foo
35-
; CHECK-O2-NEXT: Running analysis: TargetLibraryAnalysis on foo
36-
; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
37-
; CHECK-O2-NEXT: Running analysis: DominatorTreeAnalysis on foo
38-
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
39-
; CHECK-O2-NEXT: PGOIndirectCallPromotion
40-
; CHECK-O2-NEXT: Running analysis: ProfileSummaryAnalysis
41-
; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
42-
; CHECK-O2-NEXT: Running pass: IPSCCPPass
43-
; CHECK-O2-NEXT: Running analysis: AssumptionAnalysis on foo
44-
; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass
35+
; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
36+
; CHECK-O23SZ-NEXT: Running pass: CallSiteSplittingPass on foo
37+
; CHECK-O23SZ-NEXT: Running analysis: TargetLibraryAnalysis on foo
38+
; CHECK-O23SZ-NEXT: Running analysis: TargetIRAnalysis on foo
39+
; CHECK-O23SZ-NEXT: Running analysis: DominatorTreeAnalysis on foo
40+
; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
41+
; CHECK-O23SZ-NEXT: PGOIndirectCallPromotion
42+
; CHECK-O23SZ-NEXT: Running analysis: ProfileSummaryAnalysis
43+
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
44+
; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
45+
; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
46+
; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
4547
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
4648
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
4749
; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
@@ -60,56 +62,83 @@
6062
; CHECK-O-NEXT: Running pass: GlobalSplitPass
6163
; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass
6264
; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass
63-
; CHECK-O2-NEXT: Running pass: GlobalOptPass
64-
; CHECK-O2-NEXT: Running pass: PromotePass
65-
; CHECK-O2-NEXT: Running pass: ConstantMergePass
66-
; CHECK-O2-NEXT: Running pass: DeadArgumentEliminationPass
67-
; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
65+
; CHECK-O23SZ-NEXT: Running pass: GlobalOptPass
66+
; CHECK-O23SZ-NEXT: Running pass: PromotePass
67+
; CHECK-O23SZ-NEXT: Running pass: ConstantMergePass
68+
; CHECK-O23SZ-NEXT: Running pass: DeadArgumentEliminationPass
69+
; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
6870
; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
69-
; CHECK-O2-NEXT: Running pass: InstCombinePass
71+
; CHECK-O23SZ-NEXT: Running pass: InstCombinePass
7072
; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
71-
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
72-
; CHECK-O2-NEXT: Running pass: ModuleInlinerWrapperPass
73-
; CHECK-O2-NEXT: Running analysis: InlineAdvisorAnalysis
74-
; CHECK-O2-NEXT: Starting llvm::Module pass manager run.
75-
; CHECK-O2-NEXT: Starting CGSCC pass manager run.
76-
; CHECK-O2-NEXT: Running pass: InlinerPass
77-
; CHECK-O2-NEXT: Running pass: InlinerPass
78-
; CHECK-O2-NEXT: Finished CGSCC pass manager run.
79-
; CHECK-O2-NEXT: Finished llvm::Module pass manager run.
80-
; CHECK-O2-NEXT: Running pass: GlobalOptPass
81-
; CHECK-O2-NEXT: Running pass: GlobalDCEPass
82-
; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
83-
; CHECK-O2-NEXT: Running pass: InstCombinePass
73+
; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
74+
; CHECK-O23SZ-NEXT: Running pass: ModuleInlinerWrapperPass
75+
; CHECK-O23SZ-NEXT: Running analysis: InlineAdvisorAnalysis
76+
; CHECK-O23SZ-NEXT: Starting llvm::Module pass manager run.
77+
; CHECK-O23SZ-NEXT: Starting CGSCC pass manager run.
78+
; CHECK-O23SZ-NEXT: Running pass: InlinerPass
79+
; CHECK-O23SZ-NEXT: Running pass: InlinerPass
80+
; CHECK-O23SZ-NEXT: Finished CGSCC pass manager run.
81+
; CHECK-O23SZ-NEXT: Finished llvm::Module pass manager run.
82+
; CHECK-O23SZ-NEXT: Running pass: GlobalOptPass
83+
; CHECK-O23SZ-NEXT: Running pass: GlobalDCEPass
84+
; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
85+
; CHECK-O23SZ-NEXT: Running pass: InstCombinePass
8486
; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
85-
; CHECK-O2-NEXT: Running pass: JumpThreadingPass
86-
; CHECK-O2-NEXT: Running analysis: LazyValueAnalysis
87-
; CHECK-O2-NEXT: Running pass: SROA on foo
88-
; CHECK-O2-NEXT: Running pass: TailCallElimPass on foo
89-
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
90-
; CHECK-O2-NEXT: Running pass: PostOrderFunctionAttrsPass
91-
; CHECK-O2-NEXT: Running pass: GVN on foo
92-
; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
93-
; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis
94-
; CHECK-O2-NEXT: Running pass: MemCpyOptPass on foo
95-
; CHECK-O2-NEXT: Running pass: DSEPass on foo
96-
; CHECK-O2-NEXT: Running analysis: MemorySSAAnalysis on foo
97-
; CHECK-O2-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
98-
; CHECK-O2-NEXT: Running pass: InstCombinePass on foo
99-
; CHECK-O2-NEXT: Running pass: SimplifyCFGPass on foo
100-
; CHECK-O2-NEXT: Running pass: SCCPPass on foo
101-
; CHECK-O2-NEXT: Running pass: InstCombinePass on foo
102-
; CHECK-O2-NEXT: Running pass: BDCEPass on foo
103-
; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis
104-
; CHECK-O2-NEXT: Running pass: InstCombinePass
105-
; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
106-
; CHECK-O2-NEXT: Running pass: JumpThreadingPass
107-
; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass
108-
; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass
87+
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass
88+
; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis
89+
; CHECK-O23SZ-NEXT: Running pass: SROA on foo
90+
; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass on foo
91+
; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
92+
; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass on (foo)
93+
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
94+
; CHECK-O23SZ-NEXT: Running analysis: LoopAnalysis on foo
95+
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo
96+
; CHECK-O23SZ-NEXT: Running analysis: ScalarEvolutionAnalysis on foo
97+
; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy
98+
; CHECK-O23SZ-NEXT: Running pass: LICMPass on Loop
99+
; CHECK-O23SZ-NEXT: Running pass: GVN on foo
100+
; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo
101+
; CHECK-O23SZ-NEXT: Running analysis: PhiValuesAnalysis on foo
102+
; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo
103+
; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo
104+
; CHECK-O23SZ-NEXT: Running analysis: MemorySSAAnalysis on foo
105+
; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo
106+
; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass on foo
107+
; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run.
108+
; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
109+
; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo
110+
; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run.
111+
; CHECK-O23SZ-NEXT: Starting Loop pass manager run.
112+
; CHECK-O23SZ-NEXT: Running pass: IndVarSimplifyPass on Loop
113+
; CHECK-O23SZ-NEXT: Running pass: LoopDeletionPass on Loop
114+
; CHECK-O23SZ-NEXT: Running pass: LoopFullUnrollPass on Loop
115+
; CHECK-O23SZ-NEXT: Finished Loop pass manager run.
116+
; CHECK-O23SZ-NEXT: Running pass: LoopDistributePass on foo
117+
; CHECK-O23SZ-NEXT: Running pass: LoopVectorizePass on foo
118+
; CHECK-O23SZ-NEXT: Running analysis: BlockFrequencyAnalysis on foo
119+
; CHECK-O23SZ-NEXT: Running analysis: BranchProbabilityAnalysis on foo
120+
; CHECK-O23SZ-NEXT: Running analysis: DemandedBitsAnalysis on foo
121+
; CHECK-O23SZ-NEXT: Running pass: LoopUnrollPass on foo
122+
; CHECK-O23SZ-NEXT: WarnMissedTransformationsPass on foo
123+
; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo
124+
; CHECK-O23SZ-NEXT: Running pass: SimplifyCFGPass on foo
125+
; CHECK-O23SZ-NEXT: Running pass: SCCPPass on foo
126+
; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo
127+
; CHECK-O23SZ-NEXT: Running pass: BDCEPass on foo
128+
; CHECK-O2-NEXT: Running pass: SLPVectorizerPass on foo
129+
; CHECK-O3-NEXT: Running pass: SLPVectorizerPass on foo
130+
; CHECK-OS-NEXT: Running pass: SLPVectorizerPass on foo
131+
; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass on foo
132+
; CHECK-O23SZ-NEXT: Running pass: AlignmentFromAssumptionsPass on foo
133+
; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo
134+
; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass on foo
135+
; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass on foo
136+
; CHECK-O23SZ-NEXT: Running pass: CrossDSOCFIPass
137+
; CHECK-O23SZ-NEXT: Running pass: LowerTypeTestsPass
109138
; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
110-
; CHECK-O2-NEXT: Running pass: SimplifyCFGPass
111-
; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass
112-
; CHECK-O2-NEXT: Running pass: GlobalDCEPass
139+
; CHECK-O23SZ-NEXT: Running pass: SimplifyCFGPass
140+
; CHECK-O23SZ-NEXT: Running pass: EliminateAvailableExternallyPass
141+
; CHECK-O23SZ-NEXT: Running pass: GlobalDCEPass
113142
; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo
114143
; CHECK-O-NEXT: Running pass: PrintModulePass
115144

0 commit comments

Comments
 (0)