Skip to content

Commit cd08fad

Browse files
committed
[LV] Include chains feeding inductions in cost precomputation.
Include chain of ops feeding inductions in cost precomputation for inductions, not just the induction increment. In VPlan, those instructions will be cleaned up, as both phi and increment are generated by VPWidenIntOrFpInductionRecipe independently. Fixes llvm#101337.
1 parent 1b936e4 commit cd08fad

File tree

2 files changed

+80
-1
lines changed

2 files changed

+80
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7080,7 +7080,16 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
70807080
for (const auto &[IV, IndDesc] : Legal->getInductionVars()) {
70817081
Instruction *IVInc = cast<Instruction>(
70827082
IV->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
7083-
SmallVector<Instruction *> IVInsts = {IV, IVInc};
7083+
SmallVector<Instruction *> IVInsts = {IVInc};
7084+
for (unsigned I = 0; I != IVInsts.size(); I++) {
7085+
for (Value *Op : IVInsts[I]->operands()) {
7086+
auto *OpI = dyn_cast<Instruction>(Op);
7087+
if (Op == IV || !OpI || !OrigLoop->contains(OpI) || !Op->hasOneUse())
7088+
continue;
7089+
IVInsts.push_back(OpI);
7090+
}
7091+
}
7092+
IVInsts.push_back(IV);
70847093
for (User *U : IV->users()) {
70857094
auto *CI = cast<Instruction>(U);
70867095
if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF))

llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,77 @@ exit:
650650
ret void
651651
}
652652

653+
define void @wombat(i32 %arg, ptr %dst) #1 {
654+
entry:
655+
%mul = mul i32 %arg, 3
656+
%zext = zext i32 %arg to i64
657+
br label %loop
658+
659+
loop:
660+
%phi = phi i64 [ 4, %entry ], [ %add, %loop ]
661+
%phi2 = phi i32 [ %mul, %entry ], [ %trunc, %loop ]
662+
%getelementptr = getelementptr i32, ptr %dst, i64 %phi
663+
%and = and i32 %phi2, 12
664+
store i32 %and, ptr %getelementptr, align 4
665+
%mul3 = mul i64 %phi, %zext
666+
%add = add i64 %phi, 1
667+
%icmp = icmp ugt i64 %phi, 65
668+
%trunc = trunc i64 %mul3 to i32
669+
br i1 %icmp, label %exit, label %loop
670+
671+
exit:
672+
ret void
673+
}
674+
675+
define void @wombat2(i32 %arg, ptr %dst) #1 {
676+
entry:
677+
%mul = mul i32 %arg, 3
678+
%zext = zext i32 %arg to i64
679+
br label %loop
680+
681+
loop:
682+
%phi = phi i64 [ 4, %entry ], [ %add, %loop ]
683+
%phi2 = phi i32 [ %mul, %entry ], [ %trunc.1, %loop ]
684+
%getelementptr = getelementptr i32, ptr %dst, i64 %phi
685+
%and = and i32 %phi2, 12
686+
store i32 %and, ptr %getelementptr, align 4
687+
%mul3 = mul i64 %phi, %zext
688+
%add = add i64 %phi, 1
689+
%icmp = icmp ugt i64 %phi, 65
690+
%trunc.0 = trunc i64 %mul3 to i60
691+
%trunc.1 = trunc i60 %trunc.0 to i32
692+
br i1 %icmp, label %exit, label %loop
693+
694+
exit:
695+
ret void
696+
}
697+
698+
699+
define void @with_dead_use(i32 %arg, ptr %dst) #1 {
700+
entry:
701+
%mul = mul i32 %arg, 3
702+
%zext = zext i32 %arg to i64
703+
br label %loop
704+
705+
loop:
706+
%phi = phi i64 [ 4, %entry ], [ %add, %loop ]
707+
%phi2 = phi i32 [ %mul, %entry ], [ %trunc, %loop ]
708+
%getelementptr = getelementptr i32, ptr %dst, i64 %phi
709+
%and = and i32 %phi2, 12
710+
store i32 %and, ptr %getelementptr, align 4
711+
%mul3 = mul i64 %phi, %zext
712+
%add = add i64 %phi, 1
713+
%icmp = icmp ugt i64 %phi, 65
714+
%trunc = trunc i64 %mul3 to i32
715+
%dead.and = and i32 %trunc, 123
716+
br i1 %icmp, label %exit, label %loop
717+
718+
exit:
719+
ret void
720+
}
721+
653722
attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" }
723+
attributes #1 = { "target-cpu"="skylake-avx512" "target-features"="-avx512f" }
654724
;.
655725
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
656726
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}

0 commit comments

Comments
 (0)