Skip to content

Commit 040b5a1

Browse files
committed
[SLP]Fix PR90211: vectorized node must match completely to be reused.
If the gather node matches the vectorized node, it must also match with the scalars completely. Otherwise, need to revectorize the gather node to generate correct code.
1 parent 86b9a4f commit 040b5a1

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13166,7 +13166,8 @@ Value *BoUpSLP::vectorizeTree(
1316613166
auto *TE = const_cast<TreeEntry *>(E);
1316713167
if (auto *VecTE = getTreeEntry(TE->Scalars.front()))
1316813168
if (VecTE->isSame(TE->UserTreeIndices.front().UserTE->getOperand(
13169-
TE->UserTreeIndices.front().EdgeIdx)))
13169+
TE->UserTreeIndices.front().EdgeIdx)) &&
13170+
VecTE->isSame(TE->Scalars))
1317013171
// Found gather node which is absolutely the same as one of the
1317113172
// vectorized nodes. It may happen after reordering.
1317213173
continue;

llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ define void @foo(ptr %i7, i32 %0, i1 %tobool62.not) {
2121
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
2222
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i32 3
2323
; CHECK-NEXT: [[TMP12:%.*]] = fcmp olt <4 x float> [[TMP11]], zeroinitializer
24-
; CHECK-NEXT: [[TMP13:%.*]] = load <4 x float>, ptr poison, align 16
25-
; CHECK-NEXT: [[TMP14:%.*]] = fcmp olt <4 x float> [[TMP13]], zeroinitializer
24+
; CHECK-NEXT: [[TMP14:%.*]] = fcmp olt <4 x float> [[TMP5]], zeroinitializer
2625
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP5]], <4 x float> zeroinitializer
2726
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP12]], <4 x float> zeroinitializer, <4 x float> [[TMP15]]
2827
; CHECK-NEXT: store <4 x float> [[TMP16]], ptr [[RC21]], align 4

0 commit comments

Comments
 (0)