Skip to content

Commit c3a5eb3

Browse files
committed
[LV] Amend check for IV increments in collectUsersInEntryBlock
The check for IV increments in collectUsersInEntryBlock currently triggers for exit-block PHIs which use the IV start value, resulting in us failing to add the input value for the middle block to these PHIs. Fix this by amending the check for IV increments to only include incoming values that are instructions inside the loop.
1 parent 4687017 commit c3a5eb3

File tree

2 files changed

+85
-0
lines changed

2 files changed

+85
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8662,6 +8662,7 @@ static MapVector<PHINode *, VPValue *> collectUsersInExitBlock(
86628662
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
86638663
isa<VPWidenPointerInductionRecipe>(V) ||
86648664
(isa<Instruction>(IncomingValue) &&
8665+
OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
86658666
any_of(IncomingValue->users(), [&Inductions](User *U) {
86668667
auto *P = dyn_cast<PHINode>(U);
86678668
return P && Inductions.contains(P);
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
3+
4+
; Check that we correctly handle the use of %start2 in the exit block, and do
5+
; not crash.
6+
7+
define i64 @foo(i64 %start, i64 %end) {
8+
; CHECK-LABEL: define i64 @foo(
9+
; CHECK-SAME: i64 [[START:%.*]], i64 [[END:%.*]]) {
10+
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4
12+
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4
13+
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
14+
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
15+
; CHECK-NEXT: [[START2:%.*]] = and i64 [[START]], 12345
16+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[END]], [[START2]]
17+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
18+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
19+
; CHECK: [[VECTOR_PH]]:
20+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
21+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
22+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START2]], [[N_VEC]]
23+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
24+
; CHECK: [[VECTOR_BODY]]:
25+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START2]], [[INDEX]]
27+
; CHECK-NEXT: [[IND:%.*]] = add i64 [[OFFSET_IDX]], 0
28+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[IND]]
29+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX1]], i32 0
30+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[IND]]
31+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
32+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
33+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
34+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
35+
; CHECK: [[MIDDLE_BLOCK]]:
36+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
37+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
38+
; CHECK: [[SCALAR_PH]]:
39+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START2]], %[[ENTRY]] ]
40+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
41+
; CHECK: [[FOR_BODY]]:
42+
; CHECK-NEXT: [[IND1:%.*]] = phi i64 [ [[IND_NEXT1:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
43+
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[IND1]]
44+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
45+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[IND1]]
46+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
47+
; CHECK-NEXT: [[IND_NEXT1]] = add i64 [[IND1]], 1
48+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IND_NEXT1]], [[END]]
49+
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
50+
; CHECK: [[EXIT]]:
51+
; CHECK-NEXT: [[USE:%.*]] = phi i64 [ [[START2]], %[[FOR_BODY]] ], [ [[START2]], %[[MIDDLE_BLOCK]] ]
52+
; CHECK-NEXT: ret i64 [[USE]]
53+
;
54+
entry:
55+
%p1 = alloca [1024 x i32]
56+
%p2 = alloca [1024 x i32]
57+
call void @init_mem(ptr %p1, i64 1024)
58+
call void @init_mem(ptr %p2, i64 1024)
59+
%start2 = and i64 %start, 12345
60+
br label %for.body
61+
62+
for.body:
63+
%ind = phi i64 [ %ind.next, %for.body ], [ %start2, %entry ]
64+
%arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind
65+
%0 = load i32, ptr %arrayidx1, align 4
66+
%arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %ind
67+
%1 = load i32, ptr %arrayidx2, align 4
68+
%ind.next = add i64 %ind, 1
69+
%cmp = icmp ne i64 %ind.next, %end
70+
br i1 %cmp, label %for.body, label %exit
71+
72+
exit:
73+
%use = phi i64 [ %start2, %for.body ]
74+
ret i64 %use
75+
}
76+
77+
declare void @init_mem(ptr, i64)
78+
79+
;.
80+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
81+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
82+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
83+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
84+
;.

0 commit comments

Comments
 (0)