Skip to content

Commit 8d89dd4

Browse files
committed
[SLP]Fix PR79743: Check that all users are demoted before trying to
demote the tree entry. Need to check if all user nodes are marked for demotion before demoting the node. Otherwise, some data info might be lost after vectorization.
1 parent eb98b50 commit 8d89dd4

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13323,6 +13323,20 @@ void BoUpSLP::computeMinimumValueSizes() {
1332313323
Visited);
1332413324
}
1332513325

13326+
// Check that all users are marked for demotion.
13327+
DenseSet<Value *> Demoted(ToDemote.begin(), ToDemote.end());
13328+
DenseSet<const TreeEntry *> Visited;
13329+
for (Value *V: ToDemote) {
13330+
const TreeEntry *TE = getTreeEntry(V);
13331+
assert(TE && "Expected vectorized scalar.");
13332+
if (!Visited.insert(TE).second)
13333+
continue;
13334+
if (!all_of(TE->UserTreeIndices, [&](const EdgeInfo &EI) {
13335+
return all_of(EI.UserTE->Scalars,
13336+
[&](Value *V) { return Demoted.contains(V); });
13337+
}))
13338+
return;
13339+
}
1332613340
// Finally, map the values we can demote to the maximum bit with we computed.
1332713341
for (auto *Scalar : ToDemote) {
1332813342
auto *TE = getTreeEntry(Scalar);

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,16 @@ define void @test() {
1010
; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0
1111
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> <i8 poison, i8 0, i8 poison, i8 poison>, i8 [[TMP1]], i32 0
1212
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
13-
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1>
14-
; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i1> [[TMP6]] to <4 x i32>
15-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer
16-
; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i8> [[TMP8]], zeroinitializer
17-
; CHECK-NEXT: [[TMP10:%.*]] = sext <4 x i8> [[TMP9]] to <4 x i32>
18-
; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i1> [[TMP6]] to <4 x i32>
19-
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i32> zeroinitializer, [[TMP11]]
20-
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i32> [[TMP10]], [[TMP12]]
21-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
22-
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> zeroinitializer
23-
; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i1>
24-
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP16]])
25-
; CHECK-NEXT: [[TMP18:%.*]] = zext i1 [[TMP17]] to i32
26-
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 0, [[TMP18]]
13+
; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[TMP5]] to <4 x i32>
14+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer
15+
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer
16+
; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP8]] to <4 x i32>
17+
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i32> zeroinitializer, [[TMP6]]
18+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[TMP9]], [[TMP10]]
19+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
20+
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer
21+
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]])
22+
; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 0, [[TMP14]]
2723
; CHECK-NEXT: store i32 [[OP_RDX]], ptr null, align 4
2824
; CHECK-NEXT: ret void
2925
;

0 commit comments

Comments
 (0)