Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit a913b4a

Browse files
committed
[LV] Account for predicated stores in instruction costs
This patch ensures that we scale the estimated cost of predicated stores by block probability. This is a follow-on patch for r284123. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284126 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 2061c51 commit a913b4a

File tree

2 files changed

+45
-1
lines changed

2 files changed

+45
-1
lines changed

lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6700,6 +6700,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
67006700
// we might create due to scalarization.
67016701
Cost += getScalarizationOverhead(I, VF, TTI);
67026702

6703+
// If we have a predicated store, it may not be executed for each vector
6704+
// lane. Scale the cost by the probability of executing the predicated
6705+
// block.
6706+
if (Legal->isScalarWithPredication(I))
6707+
Cost /= getReciprocalPredBlockProb();
6708+
67036709
return Cost;
67046710
}
67056711

test/Transforms/LoopVectorize/AArch64/predication_costs.ll

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; REQUIRES: asserts
2-
; RUN: opt < %s -force-vector-width=2 -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
2+
; RUN: opt < %s -force-vector-width=2 -enable-cond-stores-vec -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
33

44
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
55
target triple = "aarch64--linux-gnu"
@@ -51,3 +51,41 @@ for.end:
5151
%tmp7 = phi i32 [ %tmp6, %for.inc ]
5252
ret i32 %tmp7
5353
}
54+
55+
; CHECK-LABEL: predicated_store
56+
;
57+
; This test checks that we correctly compute the cost of the predicated store
58+
; instruction. If we assume the block probability is 50%, we compute the cost
59+
; as:
60+
;
61+
; Cost for vector lane zero:
62+
; (store(2) + 2 * extractelement(0)) / 2 = 1
63+
; Cost for vector lane one:
64+
; (store(2) + 2 * extractelement(3)) / 2 = 4
65+
;
66+
; CHECK: Found an estimated cost of 5 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4
67+
; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
68+
;
69+
define void @predicated_store(i32* %a, i1 %c, i32 %x, i64 %n) {
70+
entry:
71+
br label %for.body
72+
73+
for.body:
74+
%i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
75+
%tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
76+
%tmp1 = load i32, i32* %tmp0, align 4
77+
br i1 %c, label %if.then, label %for.inc
78+
79+
if.then:
80+
%tmp2 = add nsw i32 %tmp1, %x
81+
store i32 %tmp2, i32* %tmp0, align 4
82+
br label %for.inc
83+
84+
for.inc:
85+
%i.next = add nuw nsw i64 %i, 1
86+
%cond = icmp slt i64 %i.next, %n
87+
br i1 %cond, label %for.body, label %for.end
88+
89+
for.end:
90+
ret void
91+
}

0 commit comments

Comments
 (0)