Skip to content

Commit 4a1d63d

Browse files
committed
[VectorCombine] Add option to only run scalarization transforms.
This patch adds a pass option to only run transforms that scalarize vector operations and do not create new vector instructions. When running VectorCombine early in the pipeline introducing new vector operations can have negative effects, like blocking loop or SLP vectorization. To avoid regressions, restrict the early VectorCombine run (when using -enable-matrix) to only perform scalarization and not introduce new vector operations. This is done as option to the pass directly, which is then set when adding the pass to the pipeline. This is done for the new pass manager only. Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D111800
1 parent 6970847 commit 4a1d63d

File tree

4 files changed

+36
-23
lines changed

4 files changed

+36
-23
lines changed

llvm/include/llvm/Transforms/Vectorize/VectorCombine.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,16 @@
2020
namespace llvm {
2121

2222
/// Optimize scalar/vector interactions in IR using target cost models.
23-
struct VectorCombinePass : public PassInfoMixin<VectorCombinePass> {
23+
class VectorCombinePass : public PassInfoMixin<VectorCombinePass> {
24+
/// If true only perform scalarization combines and do not introduce new
25+
/// vector operations.
26+
bool ScalarizationOnly;
27+
2428
public:
29+
VectorCombinePass(bool ScalarizationOnly = false)
30+
: ScalarizationOnly(ScalarizationOnly) {}
31+
2532
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
2633
};
27-
2834
}
2935
#endif // LLVM_TRANSFORMS_VECTORIZE_VECTORCOMBINE_H

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
503503
// The matrix extension can introduce large vector operations early, which can
504504
// benefit from running vector-combine early on.
505505
if (EnableMatrix)
506-
FPM.addPass(VectorCombinePass());
506+
FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true));
507507

508508
// Eliminate redundancies.
509509
FPM.addPass(MergedLoadStoreMotionPass());

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,10 @@ namespace {
6363
class VectorCombine {
6464
public:
6565
VectorCombine(Function &F, const TargetTransformInfo &TTI,
66-
const DominatorTree &DT, AAResults &AA, AssumptionCache &AC)
67-
: F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC) {}
66+
const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
67+
bool ScalarizationOnly)
68+
: F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC),
69+
ScalarizationOnly(ScalarizationOnly) {}
6870

6971
bool run();
7072

@@ -75,6 +77,11 @@ class VectorCombine {
7577
const DominatorTree &DT;
7678
AAResults &AA;
7779
AssumptionCache &AC;
80+
81+
/// If true only perform scalarization combines and do not introduce new
82+
/// vector operations.
83+
bool ScalarizationOnly;
84+
7885
InstructionWorklist Worklist;
7986

8087
bool vectorizeLoadInsert(Instruction &I);
@@ -1071,11 +1078,13 @@ bool VectorCombine::run() {
10711078
bool MadeChange = false;
10721079
auto FoldInst = [this, &MadeChange](Instruction &I) {
10731080
Builder.SetInsertPoint(&I);
1074-
MadeChange |= vectorizeLoadInsert(I);
1075-
MadeChange |= foldExtractExtract(I);
1076-
MadeChange |= foldBitcastShuf(I);
1081+
if (!ScalarizationOnly) {
1082+
MadeChange |= vectorizeLoadInsert(I);
1083+
MadeChange |= foldExtractExtract(I);
1084+
MadeChange |= foldBitcastShuf(I);
1085+
MadeChange |= foldExtractedCmps(I);
1086+
}
10771087
MadeChange |= scalarizeBinopOrCmp(I);
1078-
MadeChange |= foldExtractedCmps(I);
10791088
MadeChange |= scalarizeLoadExtract(I);
10801089
MadeChange |= foldSingleElementStore(I);
10811090
};
@@ -1137,7 +1146,7 @@ class VectorCombineLegacyPass : public FunctionPass {
11371146
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
11381147
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
11391148
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1140-
VectorCombine Combiner(F, TTI, DT, AA, AC);
1149+
VectorCombine Combiner(F, TTI, DT, AA, AC, false);
11411150
return Combiner.run();
11421151
}
11431152
};
@@ -1161,7 +1170,7 @@ PreservedAnalyses VectorCombinePass::run(Function &F,
11611170
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
11621171
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
11631172
AAResults &AA = FAM.getResult<AAManager>(F);
1164-
VectorCombine Combiner(F, TTI, DT, AA, AC);
1173+
VectorCombine Combiner(F, TTI, DT, AA, AC, ScalarizationOnly);
11651174
if (!Combiner.run())
11661175
return PreservedAnalyses::all();
11671176
PreservedAnalyses PA;

llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -308,18 +308,16 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
308308

309309
define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) {
310310
; CHECK-LABEL: @reverse_hadd_v4f32(
311-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
312-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
313-
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
314-
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[SHIFT1]], [[A]]
315-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 undef, i32 undef, i32 6, i32 0>
316-
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
317-
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
318-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 undef, i32 4, i32 2, i32 3>
319-
; CHECK-NEXT: [[SHIFT3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
320-
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT3]], [[B]]
321-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 6, i32 1, i32 2, i32 3>
322-
; CHECK-NEXT: ret <4 x float> [[TMP7]]
311+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <2 x i32> <i32 2, i32 0>
312+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> undef, <2 x i32> <i32 3, i32 1>
313+
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
314+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
315+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <2 x i32> <i32 2, i32 0>
316+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <2 x i32> <i32 3, i32 1>
317+
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[TMP6]]
318+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
319+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
320+
; CHECK-NEXT: ret <4 x float> [[TMP9]]
323321
;
324322
%vecext = extractelement <4 x float> %a, i32 0
325323
%vecext1 = extractelement <4 x float> %a, i32 1

0 commit comments

Comments
 (0)