Skip to content

Commit 720ab3b

Browse files
hassnaaHamditstellar
authored andcommitted
[VPlan] Compute cost for binary op VPInstruction with underlying values. llvm#125434
As exposed by llvm#125094, we are missing cost computation for some binary VPInstructions we created based on original IR instructions. Their cost should be considered. PR: llvm#125434 Author: Florian Hahn <[email protected]> Change-Id: Icf985b3f1cd40898a17faaf47b241e2651f9e8dd
1 parent 8e06e0e commit 720ab3b

File tree

3 files changed

+99
-4
lines changed

3 files changed

+99
-4
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,10 +1314,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
13141314

13151315
/// Return the cost of this VPInstruction.
13161316
InstructionCost computeCost(ElementCount VF,
1317-
VPCostContext &Ctx) const override {
1318-
// TODO: Compute accurate cost after retiring the legacy cost model.
1319-
return 0;
1320-
}
1317+
VPCostContext &Ctx) const override;
13211318

13221319
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
13231320
/// Print the VPInstruction to \p O.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,30 @@ Value *VPInstruction::generate(VPTransformState &State) {
703703
}
704704
}
705705

706+
InstructionCost VPInstruction::computeCost(ElementCount VF,
707+
VPCostContext &Ctx) const {
708+
if (Instruction::isBinaryOp(getOpcode())) {
709+
if (!getUnderlyingValue()) {
710+
// TODO: Compute cost for VPInstructions without underlying values once
711+
// the legacy cost model has been retired.
712+
return 0;
713+
}
714+
assert(!doesGeneratePerAllLanes() &&
715+
"Should only generate a vector value or single scalar, not scalars "
716+
"for all lanes.");
717+
Type *ResTy = Ctx.Types.inferScalarType(this);
718+
if (!vputils::onlyFirstLaneUsed(this))
719+
ResTy = toVectorTy(ResTy, VF);
720+
return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
721+
}
722+
723+
// TODO: Compute cost other VPInstructions once the legacy cost model has
724+
// been retired.
725+
assert(!getUnderlyingValue() &&
726+
"unexpected VPInstruction witht underlying value");
727+
return 0;
728+
}
729+
706730
bool VPInstruction::isVectorToScalar() const {
707731
return getOpcode() == VPInstruction::ExtractFromEnd ||
708732
getOpcode() == VPInstruction::ComputeReductionResult ||
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of"
2+
; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s
3+
4+
; REQUIRES: asserts
5+
6+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
7+
8+
define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) {
9+
; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction'
10+
; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1
11+
; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
12+
; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
13+
; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
14+
; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
15+
; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
16+
; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
17+
; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src>
18+
; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>
19+
; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
20+
; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
21+
; CHECK: Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>
22+
; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
23+
; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>
24+
; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
25+
; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
26+
; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2>
27+
; CHECK: Cost of 0 for VF 2: vector loop backedge
28+
; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
29+
; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
30+
; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
31+
; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
32+
; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>
33+
; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>
34+
; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>
35+
; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src>
36+
; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>
37+
; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>
38+
; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>
39+
; CHECK: Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>
40+
; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>
41+
; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>
42+
; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>
43+
; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>
44+
; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2>
45+
; CHECK: Cost of 0 for VF 4: vector loop backedge
46+
; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1
47+
; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
48+
; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32
49+
;
50+
entry:
51+
br label %loop.header
52+
53+
loop.header:
54+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
55+
%g.src = getelementptr inbounds i64, ptr %src, i64 %iv
56+
%l = load i64, ptr %g.src
57+
%iv.4 = add nuw nsw i64 %iv, 4
58+
%c = icmp ule i64 %l, 128
59+
br i1 %c, label %loop.then, label %loop.latch
60+
61+
loop.then:
62+
%or = or disjoint i64 %iv.4, 1
63+
%g.dst = getelementptr inbounds i64, ptr %dst, i64 %or
64+
store i64 %iv.4, ptr %g.dst, align 4
65+
br label %loop.latch
66+
67+
loop.latch:
68+
%iv.next = add nuw nsw i64 %iv, 1
69+
%exitcond = icmp eq i64 %iv.next, 32
70+
br i1 %exitcond, label %exit, label %loop.header
71+
72+
exit:
73+
ret void
74+
}

0 commit comments

Comments
 (0)