Skip to content

Commit 7db87a6

Browse files
committed
[SLP]Fix PR66795: Check correct deps for vectorized inst with multiple
vectorized node uses. If the instruction is vectorized in many different vector nodes, it may break the dependency analysis for gathered nodes with matched scalars. Need to properly check the dependency between such gather nodes to avoid cycle dependency.
1 parent 816144b commit 7db87a6

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8939,6 +8939,12 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
89398939
TE->UserTreeIndices.front().EdgeIdx <
89408940
TEPtr->UserTreeIndices.front().EdgeIdx)
89418941
continue;
8942+
// If the user instruction is used for some reason in different
8943+
// vectorized nodes - make it depend on index.
8944+
if (TE->UserTreeIndices.front().UserTE !=
8945+
TEPtr->UserTreeIndices.front().UserTE &&
8946+
TE->Idx > TEPtr->Idx)
8947+
continue;
89428948
}
89438949
// Check if the user node of the TE comes after user node of EntryPtr,
89448950
// otherwise EntryPtr depends on TE.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(double %0) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: double [[TMP0:%.*]]) {
7+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
8+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> zeroinitializer
9+
; CHECK-NEXT: br label [[TMP4:%.*]]
10+
; CHECK: 4:
11+
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]]
12+
; CHECK-NEXT: [[TMP6:%.*]] = fsub <2 x double> zeroinitializer, [[TMP3]]
13+
; CHECK-NEXT: br label [[DOTBACKEDGE:%.*]]
14+
; CHECK: .backedge:
15+
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP5]], [[TMP6]]
16+
; CHECK-NEXT: [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], zeroinitializer
17+
; CHECK-NEXT: br label [[TMP4]]
18+
;
19+
br label %2
20+
21+
2:
22+
%3 = fsub double 0.000000e+00, %0
23+
%4 = fsub double 0.000000e+00, %0
24+
%5 = fsub double 0.000000e+00, %0
25+
br label %.backedge
26+
27+
.backedge:
28+
%6 = fmul double %4, %5
29+
%7 = fcmp olt double %6, 0.000000e+00
30+
%8 = fmul double %5, %3
31+
%9 = fcmp olt double %8, 0.000000e+00
32+
br label %2
33+
}

0 commit comments

Comments
 (0)