Skip to content

Commit 843ef77

Browse files
committed
[SLP]Update mapping between values and their matching entries upon selection
Need to update the mapping between gathered values and their matching entries, if the list of the entries is updated and only some of them are selected for final shuffling. Fixes #134085
1 parent f475ccd commit 843ef77

File tree

2 files changed

+132
-1
lines changed

2 files changed

+132
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14335,7 +14335,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1433514335
// single vector. If we have 2 different sets, we're in situation where we
1433614336
// have a permutation of 2 input vectors.
1433714337
SmallVector<SmallPtrSet<const TreeEntry *, 4>> UsedTEs;
14338-
DenseMap<Value *, int> UsedValuesEntry;
14338+
SmallDenseMap<Value *, int> UsedValuesEntry;
1433914339
SmallPtrSet<const Value *, 16> VisitedValue;
1434014340
auto CheckAndUseSameNode = [&](const TreeEntry *TEPtr) {
1434114341
// The node is reused - exit.
@@ -14552,6 +14552,8 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1455214552
// No perfect match, just shuffle, so choose the first tree node from the
1455314553
// tree.
1455414554
Entries.push_back(FirstEntries.front());
14555+
// Update mapping between values and corresponding tree entries.
14556+
for_each(UsedValuesEntry, [&](auto &P) { P.second = 0; });
1455514557
VF = FirstEntries.front()->getVectorFactor();
1455614558
} else {
1455714559
// Try to find nodes with the same vector factor.
@@ -14596,6 +14598,18 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1459614598
} else {
1459714599
VF = Entries.front()->getVectorFactor();
1459814600
}
14601+
SmallVector<SmallPtrSet<Value *, 8>> ValuesToEntries;
14602+
for (const TreeEntry *E : Entries)
14603+
ValuesToEntries.emplace_back().insert(E->Scalars.begin(),
14604+
E->Scalars.end());
14605+
// Update mapping between values and corresponding tree entries.
14606+
for_each(UsedValuesEntry, [&](auto &P) {
14607+
for (unsigned Idx : seq<unsigned>(ValuesToEntries.size()))
14608+
if (ValuesToEntries[Idx].contains(P.first)) {
14609+
P.second = Idx;
14610+
break;
14611+
}
14612+
});
1459914613
}
1460014614

1460114615
bool IsSplatOrUndefs = isSplat(VL) || all_of(VL, IsaPred<UndefValue>);
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-9999 < %s | FileCheck %s
3+
4+
define i32 @test(i32 %s.0) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: i32 [[S_0:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 0>, i32 [[S_0]], i32 2
9+
; CHECK-NEXT: br label %[[K:.*]]
10+
; CHECK: [[K]]:
11+
; CHECK-NEXT: [[DOTPRE:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP17:%.*]], %[[T:.*]] ]
12+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[DOTPRE]], i32 1
13+
; CHECK-NEXT: br label %[[O:.*]]
14+
; CHECK: [[IF_END3:.*]]:
15+
; CHECK-NEXT: br label %[[IF_END6:.*]]
16+
; CHECK: [[IF_END6]]:
17+
; CHECK-NEXT: [[J_4:%.*]] = phi i32 [ 0, %[[IF_END3]] ], [ [[TMP28:%.*]], %[[O]] ]
18+
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ]
19+
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ zeroinitializer, %[[O]] ]
20+
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ poison, %[[IF_END3]] ], [ [[TMP22:%.*]], %[[O]] ]
21+
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], zeroinitializer
22+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
23+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
24+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
25+
; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP8]], <2 x i32> [[TMP2]], i64 2)
26+
; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP9]], <2 x i32> [[TMP3]], i64 4)
27+
; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP10]], <2 x i32> [[TMP5]], i64 6)
28+
; CHECK-NEXT: br i1 false, label %[[IF_END24:.*]], label %[[IF_THEN11:.*]]
29+
; CHECK: [[IF_THEN11]]:
30+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
31+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
32+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison>, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 poison>
33+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[J_4]], i32 7
34+
; CHECK-NEXT: br label %[[IF_END24]]
35+
; CHECK: [[IF_THEN18:.*]]:
36+
; CHECK-NEXT: br label %[[T]]
37+
; CHECK: [[T]]:
38+
; CHECK-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ]
39+
; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0
40+
; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]]
41+
; CHECK: [[IF_END24]]:
42+
; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP15]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP16]], %[[T]] ]
43+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> <i32 7, i32 1>
44+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
45+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 6>
46+
; CHECK-NEXT: br label %[[O]]
47+
; CHECK: [[O]]:
48+
; CHECK-NEXT: [[TMP22]] = phi <2 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP19]], %[[IF_END24]] ]
49+
; CHECK-NEXT: [[TMP23]] = phi <4 x i32> [ [[TMP1]], %[[K]] ], [ [[TMP20]], %[[IF_END24]] ]
50+
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP21]], %[[IF_END24]] ]
51+
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison>
52+
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP25]], <8 x i32> <i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
53+
; CHECK-NEXT: [[TMP27]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP26]], <4 x i32> [[TMP23]], i64 4)
54+
; CHECK-NEXT: [[TMP28]] = extractelement <4 x i32> [[TMP24]], i32 3
55+
; CHECK-NEXT: br i1 false, label %[[T]], label %[[IF_END6]]
56+
;
57+
entry:
58+
br label %k
59+
60+
k:
61+
%.pre = phi i32 [ 0, %entry ], [ %.pre4053, %t ]
62+
br label %o
63+
64+
if.end3:
65+
br label %if.end6
66+
67+
if.end6:
68+
%.pre43 = phi i32 [ 0, %if.end3 ], [ 0, %o ]
69+
%0 = phi i32 [ 0, %if.end3 ], [ 0, %o ]
70+
%1 = phi i32 [ 0, %if.end3 ], [ 0, %o ]
71+
%h.4 = phi i32 [ 0, %if.end3 ], [ %h.6, %o ]
72+
%i.4 = phi i32 [ 0, %if.end3 ], [ %i.7, %o ]
73+
%j.4 = phi i32 [ 0, %if.end3 ], [ %j.5, %o ]
74+
%l.4 = phi i32 [ 0, %if.end3 ], [ 0, %o ]
75+
%not8 = xor i32 %i.4, 0
76+
%not9 = xor i32 %h.4, 0
77+
br i1 false, label %if.end24, label %if.then11
78+
79+
if.then11:
80+
br label %if.end24
81+
82+
if.then18:
83+
br label %t
84+
85+
t:
86+
%.pre40 = phi i32 [ %.pre4053, %o ], [ 0, %if.then18 ]
87+
%i.5 = phi i32 [ 0, %o ], [ 0, %if.then18 ]
88+
%l.5 = phi i32 [ %l.7, %o ], [ 0, %if.then18 ]
89+
%r.2 = phi i32 [ %r.4, %o ], [ 0, %if.then18 ]
90+
%s.2 = phi i32 [ %s.4, %o ], [ 0, %if.then18 ]
91+
br i1 false, label %if.end24, label %k
92+
93+
if.end24:
94+
%.pre4050 = phi i32 [ 0, %if.then11 ], [ 0, %if.end6 ], [ %.pre40, %t ]
95+
%.pre42 = phi i32 [ 0, %if.then11 ], [ %.pre43, %if.end6 ], [ %.pre40, %t ]
96+
%2 = phi i32 [ 0, %if.then11 ], [ %0, %if.end6 ], [ %.pre40, %t ]
97+
%3 = phi i32 [ 0, %if.then11 ], [ %1, %if.end6 ], [ %.pre40, %t ]
98+
%i.6 = phi i32 [ %i.4, %if.then11 ], [ %i.7, %if.end6 ], [ %i.5, %t ]
99+
%l.6 = phi i32 [ 0, %if.then11 ], [ %l.4, %if.end6 ], [ %l.5, %t ]
100+
%r.3 = phi i32 [ %j.4, %if.then11 ], [ %not8, %if.end6 ], [ %r.2, %t ]
101+
%s.3 = phi i32 [ 0, %if.then11 ], [ %not9, %if.end6 ], [ %s.2, %t ]
102+
br label %o
103+
104+
o:
105+
%.pre4053 = phi i32 [ 0, %k ], [ %.pre4050, %if.end24 ]
106+
%.pre46 = phi i32 [ 0, %k ], [ %.pre42, %if.end24 ]
107+
%4 = phi i32 [ 0, %k ], [ %2, %if.end24 ]
108+
%5 = phi i32 [ 0, %k ], [ %3, %if.end24 ]
109+
%h.6 = phi i32 [ 0, %k ], [ %r.3, %if.end24 ]
110+
%i.7 = phi i32 [ 0, %k ], [ %i.6, %if.end24 ]
111+
%j.5 = phi i32 [ 0, %k ], [ %s.3, %if.end24 ]
112+
%l.7 = phi i32 [ %.pre, %k ], [ %l.6, %if.end24 ]
113+
%r.4 = phi i32 [ 0, %k ], [ %r.3, %if.end24 ]
114+
%s.4 = phi i32 [ %s.0, %k ], [ %s.3, %if.end24 ]
115+
br i1 false, label %t, label %if.end6
116+
}
117+

0 commit comments

Comments
 (0)