Skip to content

Commit 369b5ad

Browse files
committed
[LV] Check isPredInst instead of isScalarWithPred in uniform analysis.
Any instruction marked as uniform will result in a uniform VPReplicateRecipe. If it requires predication, it will be placed in a replicate region, even if isScalarWithPredication returns false. Check isPredicatedInst instead of isScalarWithPredication to avoid generating uniform VPReplicateRecipes placed inside a replicate region. This fixes an assertion when using scalable VFs. Fixes #80416. Fixes #94328.
1 parent 967eba0 commit 369b5ad

File tree

2 files changed

+97
-4
lines changed

2 files changed

+97
-4
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3907,7 +3907,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
39073907
SetVector<Instruction *> Worklist;
39083908

39093909
// Add uniform instructions demanding lane 0 to the worklist. Instructions
3910-
// that are scalar with predication must not be considered uniform after
3910+
// that are require predication must not be considered uniform after
39113911
// vectorization, because that would create an erroneous replicating region
39123912
// where only a single instance out of VF should be formed.
39133913
// TODO: optimize such seldom cases if found important, see PR40816.
@@ -3917,9 +3917,10 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
39173917
<< *I << "\n");
39183918
return;
39193919
}
3920-
if (isScalarWithPredication(I, VF)) {
3921-
LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: "
3922-
<< *I << "\n");
3920+
if (isPredicatedInst(I)) {
3921+
LLVM_DEBUG(
3922+
dbgs() << "LV: Found not uniform due to requiring predication: " << *I
3923+
<< "\n");
39233924
return;
39243925
}
39253926
LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *I << "\n");
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; RUN: opt -p loop-vectorize -mtriple aarch64 -mcpu=neoverse-v1 -S %s | FileCheck %s
2+
3+
; Test case for https://github.com/llvm/llvm-project/issues/94328.
4+
define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
5+
entry:
6+
%conv61 = zext i32 %x to i64
7+
br label %loop
8+
9+
loop:
10+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
11+
%div18 = sdiv i64 %M, %conv6
12+
%conv20 = trunc i64 %div18 to i32
13+
%mul30 = mul i64 %div18, %conv61
14+
%sub31 = sub i64 %iv, %mul30
15+
%conv34 = trunc i64 %sub31 to i32
16+
%mul35 = mul i32 %x, %conv20
17+
%add36 = add i32 %mul35, %conv34
18+
%idxprom = sext i32 %add36 to i64
19+
%gep = getelementptr double, ptr %dst, i64 %idxprom
20+
store double 0.000000e+00, ptr %gep, align 8
21+
%iv.next = add i64 %iv, 1
22+
%ec = icmp eq i64 %iv.next, %N
23+
br i1 %ec, label %exit, label %loop
24+
25+
exit:
26+
ret void
27+
}
28+
29+
define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
30+
entry:
31+
%conv61 = zext i32 %x to i64
32+
br label %loop
33+
34+
loop:
35+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
36+
%c = icmp ule i64 %iv, %M
37+
br i1 %c, label %then, label %loop.latch
38+
39+
then:
40+
%div18 = sdiv i64 %M, %conv6
41+
%conv20 = trunc i64 %div18 to i32
42+
%mul30 = mul i64 %div18, %conv61
43+
%sub31 = sub i64 %iv, %mul30
44+
%conv34 = trunc i64 %sub31 to i32
45+
%mul35 = mul i32 %x, %conv20
46+
%add36 = add i32 %mul35, %conv34
47+
%idxprom = sext i32 %add36 to i64
48+
%gep = getelementptr double, ptr %dst, i64 %idxprom
49+
store double 0.000000e+00, ptr %gep, align 8
50+
br label %loop.latch
51+
52+
loop.latch:
53+
%iv.next = add i64 %iv, 1
54+
%ec = icmp eq i64 %iv.next, %N
55+
br i1 %ec, label %exit, label %loop
56+
57+
exit:
58+
ret void
59+
}
60+
61+
; Test case for https://github.com/llvm/llvm-project/issues/80416.
62+
define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
63+
entry:
64+
%mul.1.i = mul i64 %x, %x
65+
%mul.2.i = mul i64 %mul.1.i, %x
66+
br label %loop
67+
68+
loop:
69+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
70+
%div.i = udiv i64 %iv, %mul.2.i
71+
%rem.i = urem i64 %iv, %mul.2.i
72+
%div.1.i = udiv i64 %rem.i, %mul.1.i
73+
%rem.1.i = urem i64 %rem.i, %mul.1.i
74+
%div.2.i = udiv i64 %rem.1.i, %x
75+
%rem.2.i = urem i64 %rem.1.i, %x
76+
%mul.i = mul i64 %x, %div.i
77+
%add.i = add i64 %mul.i, %div.1.i
78+
%mul.1.i9 = mul i64 %add.i, %x
79+
%add.1.i = add i64 %mul.1.i9, %div.2.i
80+
%mul.2.i11 = mul i64 %add.1.i, %x
81+
%add.2.i = add i64 %mul.2.i11, %rem.2.i
82+
%sext.i = shl i64 %add.2.i, 32
83+
%conv6.i = ashr i64 %sext.i, 32
84+
%gep = getelementptr i64, ptr %dst, i64 %conv6.i
85+
store i64 %div.i, ptr %gep, align 4
86+
%iv.next = add i64 %iv, 1
87+
%exitcond.not = icmp eq i64 %iv, %N
88+
br i1 %exitcond.not, label %exit, label %loop
89+
90+
exit:
91+
ret void
92+
}

0 commit comments

Comments
 (0)