Skip to content

Commit a12403c

Browse files
committed
[LV] Do not consider instrs dead if used by phi that's not in plan.
Single value phis won't be modeled in VPlan. If the phi only gets used outside the loop, the current code misses the fact that the incoming value is not dead. Update the code to also look through such phis to check for outside users. Fixes llvm#54266
1 parent 49cffe3 commit a12403c

File tree

2 files changed

+229
-9
lines changed

2 files changed

+229
-9
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,27 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
359359
}
360360
}
361361

362+
// Check for live-out users currently not modeled in VPlan.
363+
// Note that exit values of inductions are generated independent of
364+
// the recipe. This means VPWidenIntOrFpInductionRecipe &
365+
// VPScalarIVStepsRecipe can be removed, independent of uses outside
366+
// the loop.
367+
// TODO: Remove once live-outs are modeled in VPlan.
368+
static bool hasOutsideUser(Instruction &I, Loop &OrigLoop) {
369+
return any_of(I.users(), [&OrigLoop](User *U) {
370+
if (!OrigLoop.contains(cast<Instruction>(U)))
371+
return true;
372+
373+
// Look through single-value phis in the loop, as they won't be modeled in
374+
// VPlan and may be used outside the loop.
375+
if (auto *PN = dyn_cast<PHINode>(U))
376+
if (PN->getNumIncomingValues() == 1)
377+
return hasOutsideUser(*PN, OrigLoop);
378+
379+
return false;
380+
});
381+
}
382+
362383
void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) {
363384
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
364385
// Remove dead recipes in header block. The recipes in the block are processed
@@ -370,15 +391,7 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan, Loop &OrigLoop) {
370391
[](VPValue *V) { return V->getNumUsers() > 0; }) ||
371392
(!isa<VPWidenIntOrFpInductionRecipe>(&R) &&
372393
!isa<VPScalarIVStepsRecipe>(&R) && R.getUnderlyingInstr() &&
373-
any_of(R.getUnderlyingInstr()->users(), [&OrigLoop](User *U) {
374-
// Check for live-out users currently not modeled in VPlan.
375-
// Note that exit values of inductions are generated independent of
376-
// the recipe. This means VPWidenIntOrFpInductionRecipe &
377-
// VPScalarIVStepsRecipe can be removed, independent of uses outside
378-
// the loop.
379-
// TODO: Remove once live-outs are modeled in VPlan.
380-
return !OrigLoop.contains(cast<Instruction>(U));
381-
})))
394+
hasOutsideUser(*R.getUnderlyingInstr(), OrigLoop)))
382395
continue;
383396
R.eraseFromParent();
384397
}
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
3+
4+
; Tests for PR54266.
5+
define i32 @one_direct_branch(i32* %src) {
6+
; CHECK-LABEL: @one_direct_branch(
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
9+
; CHECK: vector.ph:
10+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
11+
; CHECK: vector.body:
12+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
13+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
14+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i32 [[TMP0]]
15+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
16+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
17+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
18+
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
19+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
20+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
21+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
22+
; CHECK: middle.block:
23+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0
24+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
25+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
26+
; CHECK: scalar.ph:
27+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
28+
; CHECK-NEXT: br label [[LOOP:%.*]]
29+
; CHECK: loop:
30+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
31+
; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i32 [[IV]]
32+
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[SRC_GEP]], align 4
33+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]]
34+
; CHECK-NEXT: br label [[LOOP_LATCH]]
35+
; CHECK: loop.latch:
36+
; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ]
37+
; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
38+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 0
39+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
40+
; CHECK: exit:
41+
; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
42+
; CHECK-NEXT: ret i32 [[XOR_LCSSA]]
43+
;
44+
entry:
45+
br label %loop
46+
47+
loop:
48+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
49+
%src.gep = getelementptr inbounds i32, i32* %src, i32 %iv
50+
%lv = load i32, i32* %src.gep
51+
%xor = xor i32 25500, %lv
52+
br label %loop.latch
53+
54+
loop.latch:
55+
%phi.xor = phi i32 [ %xor, %loop ]
56+
%iv.next = add nsw i32 %iv, 1
57+
%tobool.not = icmp eq i32 %iv.next, 0
58+
br i1 %tobool.not, label %exit, label %loop
59+
60+
exit:
61+
%xor.lcssa = phi i32 [ %phi.xor, %loop.latch ]
62+
ret i32 %xor.lcssa
63+
}
64+
65+
define i32 @two_direct_branch(i32* %src) {
66+
; CHECK-LABEL: @two_direct_branch(
67+
; CHECK-NEXT: entry:
68+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
69+
; CHECK: vector.ph:
70+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
71+
; CHECK: vector.body:
72+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
73+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
74+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i32 [[TMP0]]
75+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
76+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
77+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
78+
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
79+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
80+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
81+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
82+
; CHECK: middle.block:
83+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0
84+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
85+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
86+
; CHECK: scalar.ph:
87+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
88+
; CHECK-NEXT: br label [[LOOP:%.*]]
89+
; CHECK: loop:
90+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
91+
; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i32 [[IV]]
92+
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[SRC_GEP]], align 4
93+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]]
94+
; CHECK-NEXT: br label [[BB:%.*]]
95+
; CHECK: bb:
96+
; CHECK-NEXT: [[PHI_XOR_1:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ]
97+
; CHECK-NEXT: br label [[LOOP_LATCH]]
98+
; CHECK: loop.latch:
99+
; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[PHI_XOR_1]], [[BB]] ]
100+
; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
101+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 0
102+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
103+
; CHECK: exit:
104+
; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
105+
; CHECK-NEXT: ret i32 [[XOR_LCSSA]]
106+
;
107+
entry:
108+
br label %loop
109+
110+
loop: ; preds = %for.inc3, %entry
111+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
112+
%src.gep = getelementptr inbounds i32, i32* %src, i32 %iv
113+
%lv = load i32, i32* %src.gep
114+
%xor = xor i32 25500, %lv
115+
br label %bb
116+
117+
bb:
118+
%phi.xor.1 = phi i32 [ %xor, %loop ]
119+
br label %loop.latch
120+
121+
loop.latch:
122+
%phi.xor = phi i32 [ %phi.xor.1, %bb ]
123+
%iv.next = add nsw i32 %iv, 1
124+
%tobool.not = icmp eq i32 %iv.next, 0
125+
br i1 %tobool.not, label %exit, label %loop
126+
127+
exit:
128+
%xor.lcssa = phi i32 [ %phi.xor, %loop.latch ]
129+
ret i32 %xor.lcssa
130+
}
131+
132+
define i32 @cond_branch(i32 %a, i32* %src) {
133+
; CHECK-LABEL: @cond_branch(
134+
; CHECK-NEXT: entry:
135+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
136+
; CHECK: vector.ph:
137+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
138+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
139+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
140+
; CHECK: vector.body:
141+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
142+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
143+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
144+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
145+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
146+
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
147+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i32 [[TMP0]]
148+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
149+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
150+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
151+
; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
152+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
153+
; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
154+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i32> [[TMP7]]
155+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
156+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
157+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
158+
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
159+
; CHECK: middle.block:
160+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0
161+
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
162+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
163+
; CHECK: scalar.ph:
164+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
165+
; CHECK-NEXT: br label [[LOOP:%.*]]
166+
; CHECK: loop:
167+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
168+
; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i32 [[IV]]
169+
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[SRC_GEP]], align 4
170+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]]
171+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[IV]], [[A]]
172+
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[THEN:%.*]]
173+
; CHECK: then:
174+
; CHECK-NEXT: br label [[LOOP_LATCH]]
175+
; CHECK: loop.latch:
176+
; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ], [ 10, [[THEN]] ]
177+
; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1
178+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 0
179+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
180+
; CHECK: exit:
181+
; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
182+
; CHECK-NEXT: ret i32 [[XOR_LCSSA]]
183+
;
184+
entry:
185+
br label %loop
186+
187+
loop:
188+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
189+
%src.gep = getelementptr inbounds i32, i32* %src, i32 %iv
190+
%lv = load i32, i32* %src.gep
191+
%xor = xor i32 25500, %lv
192+
%cmp = icmp ne i32 %iv, %a
193+
br i1 %cmp, label %loop.latch, label %then
194+
195+
then:
196+
br label %loop.latch
197+
198+
loop.latch:
199+
%phi.xor = phi i32 [ %xor, %loop ], [ 10, %then ]
200+
%iv.next = add nsw i32 %iv, 1
201+
%tobool.not = icmp eq i32 %iv.next, 0
202+
br i1 %tobool.not, label %exit, label %loop
203+
204+
exit:
205+
%xor.lcssa = phi i32 [ %phi.xor, %loop.latch ]
206+
ret i32 %xor.lcssa
207+
}

0 commit comments

Comments
 (0)