Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit d2b07a0

Browse files
committed
[LV] Don't vectorize when we have a small static bound on trip count
We currently check if the exact trip count is known and is smaller than the "tiny loop" bound. We should be checking the maximum bound on the trip count instead. Differential Revision: https://reviews.llvm.org/D27690 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289583 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e32cf71 commit d2b07a0

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7382,8 +7382,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
73827382

73837383
// Check the loop for a trip count threshold:
73847384
// do not vectorize loops with a tiny trip count.
7385-
const unsigned TC = SE->getSmallConstantTripCount(L);
7386-
if (TC > 0u && TC < TinyTripCountVectorThreshold) {
7385+
const unsigned MaxTC = SE->getSmallConstantMaxTripCount(L);
7386+
if (MaxTC > 0u && MaxTC < TinyTripCountVectorThreshold) {
73877387
DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
73887388
<< "This loop is not worth vectorizing.");
73897389
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)

test/Transforms/LoopVectorize/small-loop.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,28 @@ define void @example1() nounwind uwtable ssp {
3030
ret void
3131
}
3232

33+
;CHECK-LABEL: @bound1(
34+
;CHECK-NOT: load <4 x i32>
35+
;CHECK: ret void
36+
define void @bound1(i32 %k) nounwind uwtable ssp {
37+
br label %1
38+
39+
; <label>:1 ; preds = %1, %0
40+
%indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
41+
%2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
42+
%3 = load i32, i32* %2, align 4
43+
%4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
44+
%5 = load i32, i32* %4, align 4
45+
%6 = add nsw i32 %5, %3
46+
%7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
47+
store i32 %6, i32* %7, align 4
48+
%indvars.iv.next = add i64 %indvars.iv, 1
49+
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
50+
%large = icmp sge i32 %lftr.wideiv, 8
51+
%exitcond = icmp eq i32 %lftr.wideiv, %k
52+
%realexit = or i1 %large, %exitcond
53+
br i1 %realexit, label %8, label %1
54+
55+
; <label>:8 ; preds = %1
56+
ret void
57+
}

0 commit comments

Comments
 (0)