Skip to content

Commit 7705342

Browse files
committed
[SLP]Do not gather node, if the instruction, that does not require
scheduling, is previously vectorized. If the main node was vectorized already, but does not require scheduling, we still can try to vectorize it in this new node instead of gathering.
1 parent 1aadd47 commit 7705342

File tree

2 files changed

+17
-15
lines changed

2 files changed

+17
-15
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5811,18 +5811,21 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
58115811
if (TreeEntry *E = getTreeEntry(S.OpValue)) {
58125812
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
58135813
if (!E->isSame(VL)) {
5814-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
5815-
if (TryToFindDuplicates(S))
5816-
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
5817-
ReuseShuffleIndicies);
5814+
if (!doesNotNeedToBeScheduled(S.OpValue)) {
5815+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
5816+
if (TryToFindDuplicates(S))
5817+
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
5818+
ReuseShuffleIndicies);
5819+
return;
5820+
}
5821+
} else {
5822+
// Record the reuse of the tree node. FIXME, currently this is only used
5823+
// to properly draw the graph rather than for the actual vectorization.
5824+
E->UserTreeIndices.push_back(UserTreeIdx);
5825+
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
5826+
<< ".\n");
58185827
return;
58195828
}
5820-
// Record the reuse of the tree node. FIXME, currently this is only used to
5821-
// properly draw the graph rather than for the actual vectorization.
5822-
E->UserTreeIndices.push_back(UserTreeIdx);
5823-
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
5824-
<< ".\n");
5825-
return;
58265829
}
58275830

58285831
// Check that none of the instructions in the bundle are already in the tree.

llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,12 @@ define void @test2(double %0) {
8383
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
8484
; CHECK-NEXT: br label [[TMP4:%.*]]
8585
; CHECK: 4:
86-
; CHECK-NEXT: [[TMP5:%.*]] = fsub double 1.000000e+00, [[TMP0]]
87-
; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> <double 3.000000e+00, double 2.000000e+00>, [[TMP3]]
86+
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> <double 3.000000e+00, double 2.000000e+00>, [[TMP3]]
87+
; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> <double 3.000000e+00, double 1.000000e+00>, [[TMP3]]
8888
; CHECK-NEXT: br label [[DOTBACKEDGE:%.*]]
8989
; CHECK: .backedge:
90-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1
91-
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[TMP7]]
92-
; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <2 x double> [[TMP8]], zeroinitializer
90+
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP5]], [[TMP6]]
91+
; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], zeroinitializer
9392
; CHECK-NEXT: br label [[TMP4]]
9493
;
9594
br label %2

0 commit comments

Comments
 (0)