Skip to content

Commit 9c86198

Browse files
committed
[SLP] Update vector value for incoming phi node, beeing vectorized already
If the phi node contains multiple same incoming blocks/values, need to update the corresponding vectorized value, if it is not going to be vectorized, if the incoming value was vectorized already. Fixes llvm#131355
1 parent 955c02d commit 9c86198

File tree

2 files changed

+200
-4
lines changed

2 files changed

+200
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3084,6 +3084,10 @@ class BoUpSLP {
30843084

30853085
/// \returns the graph entry for the \p Idx operand of the \p E entry.
30863086
const TreeEntry *getOperandEntry(const TreeEntry *E, unsigned Idx) const;
3087+
TreeEntry *getOperandEntry(TreeEntry *E, unsigned Idx) {
3088+
return const_cast<TreeEntry *>(
3089+
getOperandEntry(const_cast<const TreeEntry *>(E), Idx));
3090+
}
30873091

30883092
/// Gets the root instruction for the given node. If the node is a strided
30893093
/// load/store node with the reverse order, the root instruction is the last
@@ -10759,7 +10763,7 @@ void BoUpSLP::transformNodes() {
1075910763
break;
1076010764
// This node is a minmax node.
1076110765
E.CombinedOp = TreeEntry::MinMax;
10762-
TreeEntry *CondEntry = const_cast<TreeEntry *>(getOperandEntry(&E, 0));
10766+
TreeEntry *CondEntry = getOperandEntry(&E, 0);
1076310767
if (SelectOnly && CondEntry->UserTreeIndex &&
1076410768
CondEntry->State == TreeEntry::Vectorize) {
1076510769
// The condition node is part of the combined minmax node.
@@ -16376,7 +16380,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1637616380
// visit every block once.
1637716381
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
1637816382

16379-
for (unsigned I : seq<unsigned>(0, PH->getNumIncomingValues())) {
16383+
for (unsigned I : seq<unsigned>(PH->getNumIncomingValues())) {
1638016384
ValueList Operands;
1638116385
BasicBlock *IBB = PH->getIncomingBlock(I);
1638216386

@@ -16387,7 +16391,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
1638716391
}
1638816392

1638916393
if (!VisitedBBs.insert(IBB).second) {
16390-
NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
16394+
Value *VecOp = NewPhi->getIncomingValueForBlock(IBB);
16395+
NewPhi->addIncoming(VecOp, IBB);
16396+
TreeEntry *OpTE = getOperandEntry(E, I);
16397+
OpTE->VectorizedValue = VecOp;
1639116398
continue;
1639216399
}
1639316400

@@ -18965,7 +18972,7 @@ bool BoUpSLP::collectValuesToDemote(
1896518972
const unsigned NumOps = E.getNumOperands();
1896618973
SmallVector<const TreeEntry *> Ops(NumOps);
1896718974
transform(seq<unsigned>(0, NumOps), Ops.begin(),
18968-
std::bind(&BoUpSLP::getOperandEntry, this, &E, _1));
18975+
[&](unsigned Idx) { return getOperandEntry(&E, Idx); });
1896918976

1897018977
return TryProcessInstruction(BitWidth, Ops);
1897118978
}
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver2 < %s | FileCheck %s
3+
4+
define void @foo(ptr %arg) {
5+
; CHECK-LABEL: define void @foo(
6+
; CHECK-SAME: ptr [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[BB:.*]]:
8+
; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr null(i32 9, i64 16, i64 816, i64 400, i64 0)
9+
; CHECK-NEXT: [[ICMP:%.*]] = icmp eq ptr [[CALL]], null
10+
; CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, splat (i16 16)
11+
; CHECK-NEXT: [[AND:%.*]] = and <16 x i64> zeroinitializer, splat (i64 1)
12+
; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i64 0, 48
13+
; CHECK-NEXT: [[SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i16> zeroinitializer, <16 x i16> zeroinitializer
14+
; CHECK-NEXT: [[ADD2:%.*]] = add nsw <16 x i16> [[SELECT]], splat (i16 -6124)
15+
; CHECK-NEXT: [[ADD3:%.*]] = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, splat (i16 64)
16+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw <16 x i16> zeroinitializer, [[ADD3]]
17+
; CHECK-NEXT: [[SELECT4:%.*]] = select <16 x i1> zeroinitializer, <16 x i16> [[ADD3]], <16 x i16> [[SUB]]
18+
; CHECK-NEXT: [[ADD5:%.*]] = add <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, splat (i64 80)
19+
; CHECK-NEXT: [[FADD:%.*]] = fadd float 0.000000e+00, -1.580000e+02
20+
; CHECK-NEXT: store float 0.000000e+00, ptr null, align 4
21+
; CHECK-NEXT: [[ICMP6:%.*]] = icmp eq i32 0, 0
22+
; CHECK-NEXT: call void null()
23+
; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 9
24+
; CHECK-NEXT: br label %[[BB7:.*]]
25+
; CHECK: [[BB7]]:
26+
; CHECK-NEXT: [[PHI:%.*]] = phi float [ 4.000000e+00, %[[BB]] ], [ 0.000000e+00, %[[BB27:.*]] ]
27+
; CHECK-NEXT: [[FADD8:%.*]] = fadd float 0.000000e+00, 0.000000e+00
28+
; CHECK-NEXT: [[FADD9:%.*]] = fadd float [[PHI]], 1.000000e+00
29+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[FADD9]], i32 0
30+
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> <float 1.000000e+00, float 0.000000e+00>, [[TMP0]]
31+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
32+
; CHECK-NEXT: [[FREM:%.*]] = frem float [[TMP2]], 7.000000e+00
33+
; CHECK-NEXT: [[CALL12:%.*]] = call i32 @llvm.x86.sse.cvttss2si(<4 x float> zeroinitializer)
34+
; CHECK-NEXT: switch i32 [[CALL12]], label %[[BB13:.*]] [
35+
; CHECK-NEXT: i32 125, label %[[BB30:.*]]
36+
; CHECK-NEXT: i32 98, label %[[BB30]]
37+
; CHECK-NEXT: i32 99, label %[[BB30]]
38+
; CHECK-NEXT: i32 103, label %[[BB30]]
39+
; CHECK-NEXT: i32 112, label %[[BB30]]
40+
; CHECK-NEXT: i32 116, label %[[BB30]]
41+
; CHECK-NEXT: i32 121, label %[[BB30]]
42+
; CHECK-NEXT: ]
43+
; CHECK: [[BB13]]:
44+
; CHECK-NEXT: [[FMUL:%.*]] = fmul double 0.000000e+00, 1.220000e+02
45+
; CHECK-NEXT: [[MUL14:%.*]] = mul i32 0, -3
46+
; CHECK-NEXT: [[MUL15:%.*]] = mul i32 [[MUL14]], -3
47+
; CHECK-NEXT: [[CALL16:%.*]] = call i32 @llvm.x86.sse.cvttss2si(<4 x float> zeroinitializer)
48+
; CHECK-NEXT: switch i32 [[CALL16]], label %[[BB17:.*]] [
49+
; CHECK-NEXT: i32 125, label %[[BB30]]
50+
; CHECK-NEXT: i32 98, label %[[BB30]]
51+
; CHECK-NEXT: i32 99, label %[[BB30]]
52+
; CHECK-NEXT: i32 103, label %[[BB30]]
53+
; CHECK-NEXT: i32 112, label %[[BB30]]
54+
; CHECK-NEXT: i32 116, label %[[BB30]]
55+
; CHECK-NEXT: i32 121, label %[[BB30]]
56+
; CHECK-NEXT: ]
57+
; CHECK: [[BB17]]:
58+
; CHECK-NEXT: [[FADD18:%.*]] = fadd float 0.000000e+00, 1.000000e+00
59+
; CHECK-NEXT: [[MUL19:%.*]] = mul i32 [[MUL15]], -3
60+
; CHECK-NEXT: br label %[[BB20:.*]]
61+
; CHECK: [[BB20]]:
62+
; CHECK-NEXT: [[FADD21:%.*]] = fadd float [[FADD18]], 1.000000e+00
63+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[FADD21]], i32 0
64+
; CHECK-NEXT: switch i32 0, label %[[BB22:.*]] [
65+
; CHECK-NEXT: i32 125, label %[[BB30]]
66+
; CHECK-NEXT: i32 98, label %[[BB30]]
67+
; CHECK-NEXT: i32 99, label %[[BB30]]
68+
; CHECK-NEXT: i32 103, label %[[BB30]]
69+
; CHECK-NEXT: i32 112, label %[[BB30]]
70+
; CHECK-NEXT: i32 116, label %[[BB30]]
71+
; CHECK-NEXT: i32 121, label %[[BB30]]
72+
; CHECK-NEXT: ]
73+
; CHECK: [[BB22]]:
74+
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> <float 1.000000e+00, float 0.000000e+00>, [[TMP3]]
75+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
76+
; CHECK-NEXT: [[FREM25:%.*]] = frem float [[TMP5]], 7.000000e+00
77+
; CHECK-NEXT: [[FMUL26:%.*]] = fmul float [[FREM25]], 5.000000e+00
78+
; CHECK-NEXT: switch i32 0, label %[[BB27]] [
79+
; CHECK-NEXT: i32 125, label %[[BB30]]
80+
; CHECK-NEXT: i32 98, label %[[BB30]]
81+
; CHECK-NEXT: i32 99, label %[[BB30]]
82+
; CHECK-NEXT: i32 103, label %[[BB30]]
83+
; CHECK-NEXT: i32 112, label %[[BB30]]
84+
; CHECK-NEXT: i32 116, label %[[BB30]]
85+
; CHECK-NEXT: i32 121, label %[[BB30]]
86+
; CHECK-NEXT: ]
87+
; CHECK: [[BB27]]:
88+
; CHECK-NEXT: [[FADD28:%.*]] = fadd float [[TMP5]], 1.000000e+00
89+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
90+
; CHECK-NEXT: [[FADD29:%.*]] = fadd float [[TMP6]], 0.000000e+00
91+
; CHECK-NEXT: br label %[[BB7]]
92+
; CHECK: [[BB30]]:
93+
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ]
94+
; CHECK-NEXT: ret void
95+
;
96+
bb:
97+
%call = call noalias ptr null(i32 9, i64 16, i64 816, i64 400, i64 0)
98+
%icmp = icmp eq ptr %call, null
99+
%add = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, splat (i16 16)
100+
%and = and <16 x i64> zeroinitializer, splat (i64 1)
101+
%add1 = add nuw nsw i64 0, 48
102+
%select = select <16 x i1> zeroinitializer, <16 x i16> zeroinitializer, <16 x i16> zeroinitializer
103+
%add2 = add nsw <16 x i16> %select, splat (i16 -6124)
104+
%add3 = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, splat (i16 64)
105+
%sub = sub nsw <16 x i16> zeroinitializer, %add3
106+
%select4 = select <16 x i1> zeroinitializer, <16 x i16> %add3, <16 x i16> %sub
107+
%add5 = add <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, splat (i64 80)
108+
%fadd = fadd float 0.000000e+00, -1.580000e+02
109+
store float 0.000000e+00, ptr null, align 4
110+
%icmp6 = icmp eq i32 0, 0
111+
call void null()
112+
%mul = mul i32 0, 9
113+
br label %bb7
114+
115+
bb7:
116+
%phi = phi float [ 4.000000e+00, %bb ], [ 0.000000e+00, %bb27 ]
117+
%fadd8 = fadd float 0.000000e+00, 0.000000e+00
118+
%fadd9 = fadd float %phi, 1.000000e+00
119+
%fadd10 = fadd float %fadd9, 1.000000e+00
120+
%fadd11 = fadd float 0.000000e+00, 0.000000e+00
121+
%frem = frem float %fadd10, 7.000000e+00
122+
%call12 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> zeroinitializer)
123+
switch i32 %call12, label %bb13 [
124+
i32 125, label %bb30
125+
i32 98, label %bb30
126+
i32 99, label %bb30
127+
i32 103, label %bb30
128+
i32 112, label %bb30
129+
i32 116, label %bb30
130+
i32 121, label %bb30
131+
]
132+
133+
bb13:
134+
%fmul = fmul double 0.000000e+00, 1.220000e+02
135+
%mul14 = mul i32 0, -3
136+
%mul15 = mul i32 %mul14, -3
137+
%call16 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> zeroinitializer)
138+
switch i32 %call16, label %bb17 [
139+
i32 125, label %bb30
140+
i32 98, label %bb30
141+
i32 99, label %bb30
142+
i32 103, label %bb30
143+
i32 112, label %bb30
144+
i32 116, label %bb30
145+
i32 121, label %bb30
146+
]
147+
148+
bb17:
149+
%fadd18 = fadd float 0.000000e+00, 1.000000e+00
150+
%mul19 = mul i32 %mul15, -3
151+
br label %bb20
152+
153+
bb20:
154+
%fadd21 = fadd float %fadd18, 1.000000e+00
155+
switch i32 0, label %bb22 [
156+
i32 125, label %bb30
157+
i32 98, label %bb30
158+
i32 99, label %bb30
159+
i32 103, label %bb30
160+
i32 112, label %bb30
161+
i32 116, label %bb30
162+
i32 121, label %bb30
163+
]
164+
165+
bb22:
166+
%fadd23 = fadd float %fadd21, 1.000000e+00
167+
%fadd24 = fadd float 0.000000e+00, 0.000000e+00
168+
%frem25 = frem float %fadd23, 7.000000e+00
169+
%fmul26 = fmul float %frem25, 5.000000e+00
170+
switch i32 0, label %bb27 [
171+
i32 125, label %bb30
172+
i32 98, label %bb30
173+
i32 99, label %bb30
174+
i32 103, label %bb30
175+
i32 112, label %bb30
176+
i32 116, label %bb30
177+
i32 121, label %bb30
178+
]
179+
180+
bb27:
181+
%fadd28 = fadd float %fadd23, 1.000000e+00
182+
%fadd29 = fadd float %fadd24, 0.000000e+00
183+
br label %bb7
184+
185+
bb30:
186+
%phi31 = phi float [ %fadd10, %bb7 ], [ %fadd10, %bb7 ], [ %fadd10, %bb7 ], [ %fadd10, %bb7 ], [ %fadd10, %bb7 ], [ %fadd10, %bb7 ], [ %fadd10, %bb7 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ %fadd21, %bb20 ], [ %fadd21, %bb20 ], [ %fadd21, %bb20 ], [ %fadd21, %bb20 ], [ %fadd21, %bb20 ], [ %fadd21, %bb20 ], [ %fadd21, %bb20 ], [ %fadd23, %bb22 ], [ %fadd23, %bb22 ], [ %fadd23, %bb22 ], [ %fadd23, %bb22 ], [ %fadd23, %bb22 ], [ %fadd23, %bb22 ], [ %fadd23, %bb22 ]
187+
%phi32 = phi float [ %fadd11, %bb7 ], [ %fadd11, %bb7 ], [ %fadd11, %bb7 ], [ %fadd11, %bb7 ], [ %fadd11, %bb7 ], [ %fadd11, %bb7 ], [ %fadd11, %bb7 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb13 ], [ 0.000000e+00, %bb20 ], [ 0.000000e+00, %bb20 ], [ 0.000000e+00, %bb20 ], [ 0.000000e+00, %bb20 ], [ 0.000000e+00, %bb20 ], [ 0.000000e+00, %bb20 ], [ 0.000000e+00, %bb20 ], [ %fadd24, %bb22 ], [ %fadd24, %bb22 ], [ %fadd24, %bb22 ], [ %fadd24, %bb22 ], [ %fadd24, %bb22 ], [ %fadd24, %bb22 ], [ %fadd24, %bb22 ]
188+
ret void
189+
}

0 commit comments

Comments
 (0)