Skip to content

Commit ecb4171

Browse files
committed
[LV] Handle zero cost loops in selectInterleaveCount.
In some case, like in the added test case, we can reach selectInterleaveCount with loops that actually have a cost of 0. Unfortunately a loop cost of 0 is also used to communicate that the cost has not been computed yet. To resolve the crash, bail out if the cost remains zero after computing it. This seems like the best option, as there are multiple code paths that return a cost of 0 to force a computation in selectInterleaveCount. Computing the cost at multiple places up front there would unnecessarily complicate the logic. Fixes llvm#54413.
1 parent d1d34ba commit ecb4171

File tree

2 files changed

+62
-10
lines changed

2 files changed

+62
-10
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5695,6 +5695,18 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
56955695
!(InterleaveSmallLoopScalarReduction && HasReductions && VF.isScalar()))
56965696
return 1;
56975697

5698+
// If we did not calculate the cost for VF (because the user selected the VF)
5699+
// then we calculate the cost of VF here.
5700+
if (LoopCost == 0) {
5701+
InstructionCost C = expectedCost(VF).first;
5702+
assert(C.isValid() && "Expected to have chosen a VF with valid cost");
5703+
LoopCost = *C.getValue();
5704+
5705+
// Loop body is free and there is no need for interleaving.
5706+
if (LoopCost == 0)
5707+
return 1;
5708+
}
5709+
56985710
RegisterUsage R = calculateRegisterUsage({VF})[0];
56995711
// We divide by these constants so assume that we have at least one
57005712
// instruction that uses at least one register.
@@ -5786,16 +5798,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
57865798

57875799
assert(IC > 0 && "Interleave count must be greater than 0.");
57885800

5789-
// If we did not calculate the cost for VF (because the user selected the VF)
5790-
// then we calculate the cost of VF here.
5791-
if (LoopCost == 0) {
5792-
InstructionCost C = expectedCost(VF).first;
5793-
assert(C.isValid() && "Expected to have chosen a VF with valid cost");
5794-
LoopCost = *C.getValue();
5795-
}
5796-
5797-
assert(LoopCost && "Non-zero loop cost expected");
5798-
57995801
// Interleave if we vectorized this loop and there is a reduction that could
58005802
// benefit from interleaving.
58015803
if (VF.isVector() && HasReductions) {
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes='loop(indvars),loop-vectorize' -S %s | FileCheck %s
3+
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
; After indvars, the backedge taken count for %loop2 becomes 1, but SCEV
7+
; retains the cached original BTC, as the loop is in dead code. Make sure
8+
; LV does not crash when trying to select an interleave count for a loop with zero cost.
9+
define void @pr54413(i64* %ptr.base) {
10+
; CHECK-LABEL: @pr54413(
11+
; CHECK-NEXT: entry:
12+
; CHECK-NEXT: br label [[LOOP1:%.*]]
13+
; CHECK: loop1:
14+
; CHECK-NEXT: br i1 true, label [[LOOP1_LATCH:%.*]], label [[LOOP2_PREHEADER:%.*]]
15+
; CHECK: loop2.preheader:
16+
; CHECK-NEXT: br label [[LOOP2:%.*]]
17+
; CHECK: loop2:
18+
; CHECK-NEXT: [[PTR:%.*]] = phi i64* [ [[PTR_BASE:%.*]], [[LOOP2_PREHEADER]] ], [ [[PTR_NEXT:%.*]], [[LOOP2]] ]
19+
; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
20+
; CHECK-NEXT: br i1 true, label [[LOOP2_EXIT:%.*]], label [[LOOP2]]
21+
; CHECK: loop2.exit:
22+
; CHECK-NEXT: [[PTR_NEXT_LCSSA:%.*]] = phi i64* [ [[PTR_NEXT]], [[LOOP2]] ]
23+
; CHECK-NEXT: br label [[LOOP1_LATCH]]
24+
; CHECK: loop1.latch:
25+
; CHECK-NEXT: br label [[LOOP1]]
26+
;
27+
entry:
28+
br label %loop1
29+
30+
loop1:
31+
br i1 true, label %loop1.latch, label %loop2.preheader
32+
33+
loop2.preheader:
34+
br label %loop2
35+
36+
loop2:
37+
%iv = phi i64 [ 0, %loop2.preheader ], [ %iv.next, %loop2 ]
38+
%ptr = phi i64* [ %ptr.base, %loop2.preheader ], [ %ptr.next, %loop2 ]
39+
%iv.next = add nuw nsw i64 %iv, 1
40+
%ptr.next = getelementptr inbounds i64, i64* %ptr, i64 1
41+
%cmp = icmp eq i64 %iv, 1024
42+
br i1 %cmp, label %loop2.exit, label %loop2
43+
44+
loop2.exit:
45+
%ptr.next.lcssa = phi i64* [ %ptr.next, %loop2 ]
46+
br label %loop1.latch
47+
48+
loop1.latch:
49+
br label %loop1
50+
}

0 commit comments

Comments
 (0)