Skip to content

Commit 13feed4

Browse files
committed
[LV] Use SCEV to check if IV overflow check is known
In llvm#111310 an assert was added that for the IV overflow check used with tail folding, the overflow check is never known. However when applying the loop guards, it looks like it's possible that we might actually know the trip count won't overflow: this occurs in 500.perlbench_r from SPEC CPU 2017 and triggers the assertion: Assertion failed: (!isIndvarOverflowCheckKnownFalse(Cost, VF * UF) && !SE.isKnownPredicate(CmpInst::getInversePredicate(ICmpInst::ICMP_ULT), TC2OverflowSCEV, SE.getSCEV(Step)) && "unexpectedly proved overflow check to be known"), function emitIterationCountCheck, file LoopVectorize.cpp, line 2501. This removes the assert and instead replaces the icmp if the overflow check is known, the same way as is done for the minimum iterations check.
1 parent b2e2d8b commit 13feed4

File tree

2 files changed

+97
-9
lines changed

2 files changed

+97
-9
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,17 +2491,20 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
24912491
Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
24922492

24932493
Value *Step = CreateStep();
2494-
#ifndef NDEBUG
24952494
ScalarEvolution &SE = *PSE.getSE();
24962495
const SCEV *TC2OverflowSCEV = SE.applyLoopGuards(SE.getSCEV(LHS), OrigLoop);
2497-
assert(
2498-
!isIndvarOverflowCheckKnownFalse(Cost, VF * UF) &&
2499-
!SE.isKnownPredicate(CmpInst::getInversePredicate(ICmpInst::ICMP_ULT),
2500-
TC2OverflowSCEV, SE.getSCEV(Step)) &&
2501-
"unexpectedly proved overflow check to be known");
2502-
#endif
2503-
// Don't execute the vector loop if (UMax - n) < (VF * UF).
2504-
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
2496+
const SCEV *StepSCEV = SE.getSCEV(Step);
2497+
2498+
// Check if the tc + step is >= maxuint.
2499+
if (SE.isKnownPredicate(ICmpInst::ICMP_ULT, TC2OverflowSCEV, StepSCEV)) {
2500+
CheckMinIters = Builder.getTrue();
2501+
} else if (!SE.isKnownPredicate(
2502+
CmpInst::getInversePredicate(CmpInst::ICMP_ULT),
2503+
TC2OverflowSCEV, StepSCEV)) {
2504+
// Generate the IV overflow check only if we cannot prove the IV won't
2505+
// overflow, or known to always overflow.
2506+
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
2507+
} // else tc + step known < maxuint, use CheckMinIters preset to false
25052508
}
25062509

25072510
// Create new preheader for vector loop.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=loop-vectorize \
3+
; RUN: -force-tail-folding-style=data-with-evl \
4+
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
5+
; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s
6+
7+
; If we know the IV will never overflow then we can skip the IV overflow check
8+
9+
define void @f(ptr %p, i64 %tc) vscale_range(2, 1024) {
10+
; CHECK-LABEL: define void @f(
11+
; CHECK-SAME: ptr [[P:%.*]], i64 [[TC:%.*]]) #[[ATTR0:[0-9]+]] {
12+
; CHECK-NEXT: [[ENTRY:.*:]]
13+
; CHECK-NEXT: [[GUARD:%.*]] = icmp ugt i64 [[TC]], 1024
14+
; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
15+
; CHECK: [[LOOP_PREHEADER]]:
16+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[TC]]
17+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
18+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
19+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
20+
; CHECK: [[VECTOR_PH]]:
21+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
22+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
23+
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1
24+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TC]], [[TMP5]]
25+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
26+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
27+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
28+
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
29+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
30+
; CHECK: [[VECTOR_BODY]]:
31+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
32+
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
33+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[TC]], [[EVL_BASED_IV]]
34+
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
35+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[EVL_BASED_IV]], 0
36+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
37+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
38+
; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP8]])
39+
; CHECK-NEXT: [[VP_OP:%.*]] = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP8]])
40+
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP11]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP8]])
41+
; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP8]] to i64
42+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]]
43+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
44+
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
45+
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
46+
; CHECK: [[MIDDLE_BLOCK]]:
47+
; CHECK-NEXT: br i1 true, label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
48+
; CHECK: [[SCALAR_PH]]:
49+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ]
50+
; CHECK-NEXT: br label %[[LOOP:.*]]
51+
; CHECK: [[LOOP]]:
52+
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
53+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]]
54+
; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8
55+
; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1
56+
; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8
57+
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
58+
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[TC]]
59+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
60+
; CHECK: [[EXIT_LOOPEXIT]]:
61+
; CHECK-NEXT: br label %[[EXIT]]
62+
; CHECK: [[EXIT]]:
63+
; CHECK-NEXT: ret void
64+
;
65+
entry:
66+
%guard = icmp ugt i64 %tc, 1024
67+
br i1 %guard, label %exit, label %loop
68+
loop:
69+
%i = phi i64 [%i.next, %loop], [0, %entry]
70+
%gep = getelementptr i64, ptr %p, i64 %i
71+
%x = load i64, ptr %gep
72+
%y = add i64 %x, 1
73+
store i64 %y, ptr %gep
74+
%i.next = add i64 %i, 1
75+
%done = icmp eq i64 %i.next, %tc
76+
br i1 %done, label %exit, label %loop
77+
exit:
78+
ret void
79+
}
80+
;.
81+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
82+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
83+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
84+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
85+
;.

0 commit comments

Comments
 (0)