Skip to content

Commit 0b24031

Browse files
authored
[LoopVectorize] In LoopVectorize.cpp start using getSymbolicMaxBackedgeTakenCount (#108833)
LoopVectorizationLegality currently only treats a loop as legal to vectorise if PredicatedScalarEvolution::getBackedgeTakenCount returns a valid SCEV, or more precisely that the loop must have an exact backedge taken count. Therefore, in LoopVectorize.cpp we can safely replace all calls to getBackedgeTakenCount with calls to getSymbolicMaxBackedgeTakenCount, since the result is the same. This also helps prepare the loop vectoriser for PR #88385.
1 parent 3717048 commit 0b24031

File tree

3 files changed

+67
-5
lines changed

3 files changed

+67
-5
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4054,7 +4054,13 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
40544054
unsigned MaxVFtimesIC =
40554055
UserIC ? *MaxPowerOf2RuntimeVF * UserIC : *MaxPowerOf2RuntimeVF;
40564056
ScalarEvolution *SE = PSE.getSE();
4057-
const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
4057+
// Currently only loops with countable exits are vectorized, but calling
4058+
// getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
4059+
// uncountable exits whilst also ensuring the symbolic maximum and known
4060+
// back-edge taken count remain identical for loops with countable exits.
4061+
const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount();
4062+
assert(BackedgeTakenCount == PSE.getBackedgeTakenCount() &&
4063+
"Invalid loop count");
40584064
const SCEV *ExitCount = SE->getAddExpr(
40594065
BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
40604066
const SCEV *Rem = SE->getURemExpr(

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -881,11 +881,18 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
881881
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
882882

883883
// Create SCEV and VPValue for the trip count.
884-
const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
885-
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");
884+
885+
// Currently only loops with countable exits are vectorized, but calling
886+
// getSymbolicMaxBackedgeTakenCount allows enablement work for loops with
887+
// uncountable exits whilst also ensuring the symbolic maximum and known
888+
// back-edge taken count remain identical for loops with countable exits.
889+
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
890+
assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
891+
BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) &&
892+
"Invalid loop count");
886893
ScalarEvolution &SE = *PSE.getSE();
887-
const SCEV *TripCount =
888-
SE.getTripCountFromExitCount(BackedgeTakenCount, InductionTy, TheLoop);
894+
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
895+
InductionTy, TheLoop);
889896
Plan->TripCount =
890897
vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
891898

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -disable-output -debug 2>&1 < %s | FileCheck %s
3+
4+
; CHECK-LABEL: LV: Found a loop: for.body
5+
; CHECK: LV: Not vectorizing: Unsupported conditional branch.
6+
; CHECK: loop not vectorized: loop control flow is not understood by vectorizer
7+
; CHECK: LV: Not vectorizing: Unsupported outer loop.
8+
9+
@arr2 = external global [8 x i32], align 16
10+
@arr = external global [8 x [8 x i32]], align 16
11+
12+
define i32 @foo(i32 %n) {
13+
entry:
14+
br label %for.body
15+
16+
for.body:
17+
%iv.outer = phi i64 [ 0, %entry ], [%iv.outer.next, %for.inc ]
18+
%arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %iv.outer
19+
%ld1 = load i32, ptr %arrayidx, align 4
20+
%0 = trunc i64 %iv.outer to i32
21+
store i32 %0, ptr %arrayidx, align 4
22+
%1 = trunc i64 %iv.outer to i32
23+
%add = add nsw i32 %1, %n
24+
%cmp.early = icmp eq i32 %ld1, 3
25+
br i1 %cmp.early, label %for.early, label %for.body.inner
26+
27+
for.body.inner:
28+
%iv.inner = phi i64 [ 0, %for.body ], [ %iv.inner.next, %for.body.inner ]
29+
%arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %iv.inner, i64 %iv.outer
30+
store i32 %add, ptr %arrayidx7, align 4
31+
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
32+
%cmp.inner = icmp eq i64 %iv.inner.next, 8
33+
br i1 %cmp.inner, label %for.inc, label %for.body.inner
34+
35+
for.inc:
36+
%iv.outer.next = add nuw nsw i64 %iv.outer, 1
37+
%cmp.outer = icmp eq i64%iv.outer.next, 8
38+
br i1 %cmp.outer, label %for.end, label %for.body, !llvm.loop !1
39+
40+
for.early:
41+
ret i32 1
42+
43+
for.end:
44+
ret i32 0
45+
}
46+
47+
!1 = distinct !{!1, !2, !3}
48+
!2 = !{!"llvm.loop.vectorize.width", i32 4}
49+
!3 = !{!"llvm.loop.vectorize.enable", i1 true}

0 commit comments

Comments
 (0)