Skip to content

Commit ea7f43e

Browse files
committed
[SLP]Do not gather node, if the instruction, that does not require
scheduling, is previously vectorized. If the main node was vectorized already, but does not require scheduling, we still can try to vectorize it in this new node instead of gathering.
1 parent 1a7e8b9 commit ea7f43e

File tree

2 files changed

+36
-18
lines changed

2 files changed

+36
-18
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,6 +1326,9 @@ class BoUpSLP {
13261326
}
13271327
LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
13281328
#endif
1329+
bool operator == (const EdgeInfo &Other) const {
1330+
return UserTE == Other.UserTE && EdgeIdx == Other.EdgeIdx;
1331+
}
13291332
};
13301333

13311334
/// A helper class used for scoring candidates for two consecutive lanes.
@@ -2412,12 +2415,25 @@ class BoUpSLP {
24122415
TreeEntry *getVectorizedOperand(TreeEntry *UserTE, unsigned OpIdx) {
24132416
ArrayRef<Value *> VL = UserTE->getOperand(OpIdx);
24142417
TreeEntry *TE = nullptr;
2415-
const auto *It = find_if(VL, [this, &TE](Value *V) {
2418+
const auto *It = find_if(VL, [&](Value *V) {
24162419
TE = getTreeEntry(V);
2417-
return TE;
2420+
if (TE && is_contained(TE->UserTreeIndices, EdgeInfo(UserTE, OpIdx)))
2421+
return true;
2422+
auto It = MultiNodeScalars.find(V);
2423+
if (It != MultiNodeScalars.end()) {
2424+
for (TreeEntry *E : It->second) {
2425+
if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) {
2426+
TE = E;
2427+
return true;
2428+
}
2429+
}
2430+
}
2431+
return false;
24182432
});
2419-
if (It != VL.end() && TE->isSame(VL))
2433+
if (It != VL.end()) {
2434+
assert(TE->isSame(VL) && "Expedted same scalars.");
24202435
return TE;
2436+
}
24212437
return nullptr;
24222438
}
24232439

@@ -5806,18 +5822,21 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
58065822
if (TreeEntry *E = getTreeEntry(S.OpValue)) {
58075823
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
58085824
if (!E->isSame(VL)) {
5809-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
5810-
if (TryToFindDuplicates(S))
5811-
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
5812-
ReuseShuffleIndicies);
5825+
if (!doesNotNeedToBeScheduled(S.OpValue)) {
5826+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
5827+
if (TryToFindDuplicates(S))
5828+
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
5829+
ReuseShuffleIndicies);
5830+
return;
5831+
}
5832+
} else {
5833+
// Record the reuse of the tree node. FIXME, currently this is only used
5834+
// to properly draw the graph rather than for the actual vectorization.
5835+
E->UserTreeIndices.push_back(UserTreeIdx);
5836+
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
5837+
<< ".\n");
58135838
return;
58145839
}
5815-
// Record the reuse of the tree node. FIXME, currently this is only used to
5816-
// properly draw the graph rather than for the actual vectorization.
5817-
E->UserTreeIndices.push_back(UserTreeIdx);
5818-
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
5819-
<< ".\n");
5820-
return;
58215840
}
58225841

58235842
// Check that none of the instructions in the bundle are already in the tree.

llvm/test/Transforms/SLPVectorizer/X86/multi-node-vectorized-insts.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,12 @@ define void @test2(double %0) {
8383
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
8484
; CHECK-NEXT: br label [[TMP4:%.*]]
8585
; CHECK: 4:
86-
; CHECK-NEXT: [[TMP5:%.*]] = fsub double 1.000000e+00, [[TMP0]]
87-
; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> <double 3.000000e+00, double 2.000000e+00>, [[TMP3]]
86+
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> <double 3.000000e+00, double 2.000000e+00>, [[TMP3]]
87+
; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> <double 3.000000e+00, double 1.000000e+00>, [[TMP3]]
8888
; CHECK-NEXT: br label [[DOTBACKEDGE:%.*]]
8989
; CHECK: .backedge:
90-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1
91-
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP6]], [[TMP7]]
92-
; CHECK-NEXT: [[TMP9:%.*]] = fcmp olt <2 x double> [[TMP8]], zeroinitializer
90+
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP5]], [[TMP6]]
91+
; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], zeroinitializer
9392
; CHECK-NEXT: br label [[TMP4]]
9493
;
9594
br label %2

0 commit comments

Comments
 (0)