Skip to content

Commit acea6e9

Browse files
committed
[Passes] Only run extra vector passes if loops have been vectorized.
This patch uses a similar trick as in D113947 to only run the extra passes after vectorization on functions where loops have been vectorized. The reason for running the 'extra vector passes' is simplification/unswitching of the runtime checks created by LV, there should be no need to run them if nothing got vectorized To do that, a new dummy analysis ShouldRunExtraVectorPasses has been added. If loops have been vectorized for a function, LV will cache the analysis. At the moment it uses MadeCFGChanges as proxy for loop vectorized, which isn't perfect (it could be too aggressive, e.g. because no runtime checks have been added), but should be good enough for now. The extra passes are now managed by a new FunctionPassManager that runs its passes only if ShouldRunExtraVectorPasses has been cached. Without this patch, `-extra-vectorizer-passes` has the following compile-time impact: NewPM-O3: +4.86% NewPM-ReleaseThinLTO: +3.56% NewPM-ReleaseLTO-g: +7.17% http://llvm-compile-time-tracker.com/compare.php?from=ead3979a92fc33add4710c4510d6906260dcb4ad&to=c292da649e2c6e88a31e702fdc474727d09c72bc&stat=instructions With this patch, that gets reduced to NewPM-O3: +1.43% NewPM-ReleaseThinLTO: +1.00% NewPM-ReleaseLTO-g: +1.58% http://llvm-compile-time-tracker.com/compare.php?from=ead3979a92fc33add4710c4510d6906260dcb4ad&to=e67d86b57810011cf285eb9aa1944781be6096f0&stat=instructions It is probably still too high to enable by default, but much better. Reviewed By: aeubanks Differential Revision: https://reviews.llvm.org/D115052
1 parent eef8f3f commit acea6e9

File tree

5 files changed

+57
-8
lines changed

5 files changed

+57
-8
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,38 @@ class TargetTransformInfo;
8080
extern cl::opt<bool> EnableLoopInterleaving;
8181
extern cl::opt<bool> EnableLoopVectorization;
8282

83+
/// A marker to determine if extra passes after loop vectorization should be
84+
/// run.
85+
struct ShouldRunExtraVectorPasses
86+
: public AnalysisInfoMixin<ShouldRunExtraVectorPasses> {
87+
static AnalysisKey Key;
88+
struct Result {
89+
bool invalidate(Function &F, const PreservedAnalyses &PA,
90+
FunctionAnalysisManager::Invalidator &) {
91+
// Check whether the analysis has been explicitly invalidated. Otherwise,
92+
// it remains preserved.
93+
auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>();
94+
return !PAC.preservedWhenStateless();
95+
}
96+
};
97+
98+
Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }
99+
};
100+
101+
/// A pass manager to run a set of extra function simplification passes after
102+
/// vectorization, if requested. LoopVectorize caches the
103+
/// ShouldRunExtraVectorPasses analysis to request extra simplifications, if
104+
/// they could be beneficial.
105+
struct ExtraVectorPassManager : public FunctionPassManager {
106+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
107+
auto PA = PreservedAnalyses::all();
108+
if (AM.getCachedResult<ShouldRunExtraVectorPasses>(F))
109+
PA.intersect(FunctionPassManager::run(F, AM));
110+
PA.abandon<ShouldRunExtraVectorPasses>();
111+
return PA;
112+
}
113+
};
114+
83115
struct LoopVectorizeOptions {
84116
/// If false, consider all loops for interleaving.
85117
/// If true, only loops that explicitly request interleaving are considered.

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -995,26 +995,28 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
995995
FPM.addPass(InstCombinePass());
996996

997997
if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
998+
ExtraVectorPassManager ExtraPasses;
998999
// At higher optimization levels, try to clean up any runtime overlap and
9991000
// alignment checks inserted by the vectorizer. We want to track correlated
10001001
// runtime checks for two inner loops in the same outer loop, fold any
10011002
// common computations, hoist loop-invariant aspects out of any outer loop,
10021003
// and unswitch the runtime checks if possible. Once hoisted, we may have
10031004
// dead (or speculatable) control flows or more combining opportunities.
1004-
FPM.addPass(EarlyCSEPass());
1005-
FPM.addPass(CorrelatedValuePropagationPass());
1006-
FPM.addPass(InstCombinePass());
1005+
ExtraPasses.addPass(EarlyCSEPass());
1006+
ExtraPasses.addPass(CorrelatedValuePropagationPass());
1007+
ExtraPasses.addPass(InstCombinePass());
10071008
LoopPassManager LPM;
10081009
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
10091010
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
10101011
OptimizationLevel::O3));
1011-
FPM.addPass(
1012+
ExtraPasses.addPass(
10121013
RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
1013-
FPM.addPass(
1014+
ExtraPasses.addPass(
10141015
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
10151016
/*UseBlockFrequencyInfo=*/true));
1016-
FPM.addPass(SimplifyCFGPass());
1017-
FPM.addPass(InstCombinePass());
1017+
ExtraPasses.addPass(SimplifyCFGPass());
1018+
ExtraPasses.addPass(InstCombinePass());
1019+
FPM.addPass(std::move(ExtraPasses));
10181020
}
10191021

10201022
// Now that we've formed fast to execute loop structures, we do further

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
203203
FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
204204
FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
205205
FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis())
206+
FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses())
206207
FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
207208
FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
208209
FUNCTION_ANALYSIS("targetir",

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,8 @@ class GeneratedRTChecks;
428428

429429
namespace llvm {
430430

431+
AnalysisKey ShouldRunExtraVectorPasses::Key;
432+
431433
/// InnerLoopVectorizer vectorizes loops which contain only one basic
432434
/// block to a specified vectorization factor (VF).
433435
/// This class performs the widening of scalars into vectors, or multiple
@@ -10746,8 +10748,17 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
1074610748
PA.preserve<LoopAnalysis>();
1074710749
PA.preserve<DominatorTreeAnalysis>();
1074810750
}
10749-
if (!Result.MadeCFGChange)
10751+
10752+
if (Result.MadeCFGChange) {
10753+
// Making CFG changes likely means a loop got vectorized. Indicate that
10754+
// extra simplification passes should be run.
10755+
// TODO: MadeCFGChanges is not a prefect proxy. Extra passes should only
10756+
// be run if runtime checks have been added.
10757+
AM.getResult<ShouldRunExtraVectorPasses>(F);
10758+
PA.preserve<ShouldRunExtraVectorPasses>();
10759+
} else {
1075010760
PA.preserveSet<CFGAnalyses>();
10761+
}
1075110762
return PA;
1075210763
}
1075310764

llvm/test/Other/opt-pipeline-vector-passes.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -force-vector-width=4 -S %s 2>&1 | FileCheck %s --check-prefixes=O2
33
; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -force-vector-width=4 -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=O2_EXTRA
44

5+
; When the loop doesn't get vectorized, no extra vector passes should run.
6+
; RUN: opt -disable-verify -debug-pass-manager -passes='default<O2>' -force-vector-width=0 -extra-vectorizer-passes -S %s 2>&1 | FileCheck %s --check-prefixes=O2
7+
58
; REQUIRES: asserts
69

710
; The loop vectorizer still runs at both -O1/-O2 even with the

0 commit comments

Comments
 (0)