Skip to content

Commit 3a4555b

Browse files
committed
Check that induction variable has no unsimplifiable users
Add AArch64 test
1 parent 7b49c33 commit 3a4555b

File tree

4 files changed

+139
-102
lines changed

4 files changed

+139
-102
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,6 +2648,33 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26482648
return I->second;
26492649
}
26502650

2651+
/// Knowing that loop \p L would be fully unrolled after vectorisation, add
2652+
/// instructions that will get simplified and thus should not have any cost to
2653+
/// \p InstsToIgnore
2654+
static void AddFullyUnrolledInstructionsToIgnore(
2655+
Loop *L, const LoopVectorizationLegality::InductionList &IL,
2656+
SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
2657+
auto *Cmp = L->getLatchCmpInst();
2658+
if (!Cmp)
2659+
return;
2660+
InstsToIgnore.insert(Cmp);
2661+
for (const auto &[IV, IndDesc] : IL) {
2662+
// Get next iteration value of the induction variable
2663+
Instruction *IVInst =
2664+
cast<Instruction>(IV->getIncomingValueForBlock(L->getLoopLatch()));
2665+
bool IsSimplifiedAway = true;
2666+
// Check that this value used only to exit the loop
2667+
for (auto *UIV : IVInst->users()) {
2668+
if (UIV != IV && UIV != Cmp) {
2669+
IsSimplifiedAway = false;
2670+
break;
2671+
}
2672+
}
2673+
if (IsSimplifiedAway)
2674+
InstsToIgnore.insert(IVInst);
2675+
}
2676+
}
2677+
26512678
void InnerLoopVectorizer::createInductionResumeValues(
26522679
const SCEV2ValueTy &ExpandedSCEVs,
26532680
std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -5534,19 +5561,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
55345561
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
55355562
InstructionCost Cost;
55365563

5537-
// If with the given VF loop gets fully unrolled, ignore the costs of
5538-
// comparison and induction instructions, as they'll get simplified away
5539-
SmallPtrSet<const Value *, 16> ValuesToIgnoreForVF;
5564+
// If with the given fixed width VF loop gets fully unrolled, ignore the costs
5565+
// of comparison and induction instructions, as they'll get simplified away
5566+
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
55405567
auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
5541-
auto *Cmp = TheLoop->getLatchCmpInst();
5542-
if (Cmp && TC == VF.getKnownMinValue()) {
5543-
ValuesToIgnoreForVF.insert(Cmp);
5544-
for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
5545-
Instruction *IVInc = cast<Instruction>(
5546-
IV->getIncomingValueForBlock(TheLoop->getLoopLatch()));
5547-
ValuesToIgnoreForVF.insert(IVInc);
5548-
}
5549-
}
5568+
if (VF.isFixed() && TC == VF.getFixedValue())
5569+
AddFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
5570+
ValuesToIgnoreForVF);
55505571

55515572
// For each block.
55525573
for (BasicBlock *BB : TheLoop->blocks()) {
@@ -7240,16 +7261,10 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
72407261

72417262
// If with the given VF loop gets fully unrolled, ignore the costs of
72427263
// comparison and induction instructions, as they'll get simplified away
7243-
auto TC = CM.PSE.getSE()->getSmallConstantTripCount(OrigLoop);
7244-
auto *Cmp = OrigLoop->getLatchCmpInst();
7245-
if (Cmp && TC == VF.getKnownMinValue()) {
7246-
CostCtx.SkipCostComputation.insert(Cmp);
7247-
for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
7248-
Instruction *IVInc = cast<Instruction>(
7249-
IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
7250-
CostCtx.SkipCostComputation.insert(IVInc);
7251-
}
7252-
}
7264+
auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop);
7265+
if (VF.isFixed() && TC == VF.getFixedValue())
7266+
AddFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(),
7267+
CostCtx.SkipCostComputation);
72537268

72547269
for (Instruction *IVInst : IVInsts) {
72557270
if (CostCtx.skipCostComputation(IVInst, VF.isVector()))
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -mcpu=neoverse-v2 -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S 2>&1 | FileCheck %s
3+
4+
target triple="aarch64--linux-gnu"
5+
6+
define i64 @test(ptr %a, ptr %b) #0 {
7+
; CHECK: LV: Checking a loop in 'test'
8+
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
9+
; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %exitcond.not = icmp eq i64 %indvars.iv.next, 16
10+
; CHECK: LV: Vector loop of width 8 costs: 3.
11+
; CHECK-NOT: LV: Found an estimated cost of 1 for VF 16 For instruction: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
12+
; CHECK-NOT: LV: Found an estimated cost of 1 for VF 16 For instruction: %exitcond.not = icmp eq i64 %indvars.iv.next, 16
13+
; CHECK: LV: Vector loop of width 16 costs: 3.
14+
; CHECK: LV: Selecting VF: 16
15+
entry:
16+
br label %for.body
17+
18+
for.cond.cleanup: ; preds = %for.body
19+
%add.lcssa = phi i64 [ %add, %for.body ]
20+
ret i64 %add.lcssa
21+
22+
for.body: ; preds = %entry, %for.body
23+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
24+
%sum.09 = phi i64 [ 0, %entry ], [ %add, %for.body ]
25+
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
26+
%0 = load i8, ptr %arrayidx, align 1
27+
%conv = zext i8 %0 to i64
28+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
29+
%1 = load i8, ptr %arrayidx2, align 1
30+
%conv3 = zext i8 %1 to i64
31+
%mul = mul nuw nsw i64 %conv3, %conv
32+
%add = add i64 %mul, %sum.09
33+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
34+
%exitcond.not = icmp eq i64 %indvars.iv.next, 16
35+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
36+
}
37+
38+
attributes #0 = { vscale_range(1, 16) "target-features"="+sve" }

0 commit comments

Comments
 (0)