Skip to content

Commit fe8a27a

Browse files
fhahntstellar
authored andcommitted
[LV] Handle zero cost loops in selectInterleaveCount.
In some case, like in the added test case, we can reach selectInterleaveCount with loops that actually have a cost of 0. Unfortunately a loop cost of 0 is also used to communicate that the cost has not been computed yet. To resolve the crash, bail out if the cost remains zero after computing it. This seems like the best option, as there are multiple code paths that return a cost of 0 to force a computation in selectInterleaveCount. Computing the cost at multiple places up front there would unnecessarily complicate the logic. Fixes llvm#54413. (cherry picked from commit ecb4171)
1 parent 2c4d288 commit fe8a27a

File tree

2 files changed

+62
-10
lines changed

2 files changed

+62
-10
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6035,6 +6035,18 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
60356035
!(InterleaveSmallLoopScalarReduction && HasReductions && VF.isScalar()))
60366036
return 1;
60376037

6038+
// If we did not calculate the cost for VF (because the user selected the VF)
6039+
// then we calculate the cost of VF here.
6040+
if (LoopCost == 0) {
6041+
InstructionCost C = expectedCost(VF).first;
6042+
assert(C.isValid() && "Expected to have chosen a VF with valid cost");
6043+
LoopCost = *C.getValue();
6044+
6045+
// Loop body is free and there is no need for interleaving.
6046+
if (LoopCost == 0)
6047+
return 1;
6048+
}
6049+
60386050
RegisterUsage R = calculateRegisterUsage({VF})[0];
60396051
// We divide by these constants so assume that we have at least one
60406052
// instruction that uses at least one register.
@@ -6126,16 +6138,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
61266138

61276139
assert(IC > 0 && "Interleave count must be greater than 0.");
61286140

6129-
// If we did not calculate the cost for VF (because the user selected the VF)
6130-
// then we calculate the cost of VF here.
6131-
if (LoopCost == 0) {
6132-
InstructionCost C = expectedCost(VF).first;
6133-
assert(C.isValid() && "Expected to have chosen a VF with valid cost");
6134-
LoopCost = *C.getValue();
6135-
}
6136-
6137-
assert(LoopCost && "Non-zero loop cost expected");
6138-
61396141
// Interleave if we vectorized this loop and there is a reduction that could
61406142
// benefit from interleaving.
61416143
if (VF.isVector() && HasReductions) {
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes='loop(indvars),loop-vectorize' -S %s | FileCheck %s
3+
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
; After indvars, the backedge taken count for %loop2 becomes 1, but SCEV
7+
; retains the cached original BTC, as the loop is in dead code. Make sure
8+
; LV does not crash when trying to select an interleave count for a loop with zero cost.
9+
define void @pr54413(i64* %ptr.base) {
10+
; CHECK-LABEL: @pr54413(
11+
; CHECK-NEXT: entry:
12+
; CHECK-NEXT: br label [[LOOP1:%.*]]
13+
; CHECK: loop1:
14+
; CHECK-NEXT: br i1 true, label [[LOOP1_LATCH:%.*]], label [[LOOP2_PREHEADER:%.*]]
15+
; CHECK: loop2.preheader:
16+
; CHECK-NEXT: br label [[LOOP2:%.*]]
17+
; CHECK: loop2:
18+
; CHECK-NEXT: [[PTR:%.*]] = phi i64* [ [[PTR_BASE:%.*]], [[LOOP2_PREHEADER]] ], [ [[PTR_NEXT:%.*]], [[LOOP2]] ]
19+
; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
20+
; CHECK-NEXT: br i1 true, label [[LOOP2_EXIT:%.*]], label [[LOOP2]]
21+
; CHECK: loop2.exit:
22+
; CHECK-NEXT: [[PTR_NEXT_LCSSA:%.*]] = phi i64* [ [[PTR_NEXT]], [[LOOP2]] ]
23+
; CHECK-NEXT: br label [[LOOP1_LATCH]]
24+
; CHECK: loop1.latch:
25+
; CHECK-NEXT: br label [[LOOP1]]
26+
;
27+
entry:
28+
br label %loop1
29+
30+
loop1:
31+
br i1 true, label %loop1.latch, label %loop2.preheader
32+
33+
loop2.preheader:
34+
br label %loop2
35+
36+
loop2:
37+
%iv = phi i64 [ 0, %loop2.preheader ], [ %iv.next, %loop2 ]
38+
%ptr = phi i64* [ %ptr.base, %loop2.preheader ], [ %ptr.next, %loop2 ]
39+
%iv.next = add nuw nsw i64 %iv, 1
40+
%ptr.next = getelementptr inbounds i64, i64* %ptr, i64 1
41+
%cmp = icmp eq i64 %iv, 1024
42+
br i1 %cmp, label %loop2.exit, label %loop2
43+
44+
loop2.exit:
45+
%ptr.next.lcssa = phi i64* [ %ptr.next, %loop2 ]
46+
br label %loop1.latch
47+
48+
loop1.latch:
49+
br label %loop1
50+
}

0 commit comments

Comments
 (0)