Skip to content

Commit 079c488

Browse files
committed
[TTI][AArch64] Cost model insertelement and indexed LD1 instructions
An indexed LD1 instruction, or "ASIMD load, 1 element, one lane, B/H/S" instruction that loads a value and inserts an element into a vector is an expensive instruction. It has a latency of 8 on modern cores. We generate an indexed LD1 when an insertelement instruction has a load as an operand and this patch is recognising and makes indexed LD1 more expensive. Differential Revision: https://reviews.llvm.org/D141602
1 parent ec094d2 commit 079c488

File tree

3 files changed

+24
-16
lines changed

3 files changed

+24
-16
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,7 +2184,8 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
21842184
return 0;
21852185
}
21862186

2187-
InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
2187+
InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(const Instruction *I,
2188+
Type *Val,
21882189
unsigned Index,
21892190
bool HasRealUse) {
21902191
assert(Val->isVectorTy() && "This must be a vector type");
@@ -2210,14 +2211,21 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
22102211
// needed. So it has non-zero cost.
22112212
// - For the rest of cases (virtual instruction or element type is float),
22122213
// consider the instruction free.
2213-
//
2214+
if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
2215+
return 0;
2216+
2217+
// This is recognising a LD1 single-element structure to one lane of one
2218+
// register instruction. I.e., if this is an `insertelement` instruction,
2219+
// and its second operand is a load, then we will generate a LD1, which
2220+
// are expensive instructions.
2221+
if (I && dyn_cast<LoadInst>(I->getOperand(1)))
2222+
return ST->getVectorInsertExtractBaseCost() + 1;
2223+
22142224
// FIXME:
22152225
// If the extract-element and insert-element instructions could be
22162226
// simplified away (e.g., could be combined into users by looking at use-def
22172227
// context), they have no cost. This is not done in the first place for
22182228
// compile-time considerations.
2219-
if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
2220-
return 0;
22212229
}
22222230

22232231
// All other insert/extracts cost this much.
@@ -2228,14 +2236,14 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
22282236
TTI::TargetCostKind CostKind,
22292237
unsigned Index, Value *Op0,
22302238
Value *Op1) {
2231-
return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */);
2239+
return getVectorInstrCostHelper(nullptr, Val, Index, false /* HasRealUse */);
22322240
}
22332241

22342242
InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
22352243
Type *Val,
22362244
TTI::TargetCostKind CostKind,
22372245
unsigned Index) {
2238-
return getVectorInstrCostHelper(Val, Index, true /* HasRealUse */);
2246+
return getVectorInstrCostHelper(&I, Val, Index, true /* HasRealUse */);
22392247
}
22402248

22412249
InstructionCost AArch64TTIImpl::getArithmeticInstrCost(

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
6565
// 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
6666
// indicates whether the vector instruction is available in the input IR or
6767
// just imaginary in vectorizer passes.
68-
InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index,
69-
bool HasRealUse);
68+
InstructionCost getVectorInstrCostHelper(const Instruction *I, Type *Val,
69+
unsigned Index, bool HasRealUse);
7070

7171
public:
7272
explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)

llvm/test/Analysis/CostModel/AArch64/insert-extract.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,12 @@ define void @vectorInstrCost() {
108108
define <8 x i8> @LD1_B(<8 x i8> %vec, ptr noundef %i) {
109109
; KRYO-LABEL: 'LD1_B'
110110
; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1
111-
; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
111+
; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
112112
; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2
113113
;
114114
; NEO-LABEL: 'LD1_B'
115115
; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1
116-
; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
116+
; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
117117
; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2
118118
;
119119
entry:
@@ -125,12 +125,12 @@ entry:
125125
define <4 x i16> @LD1_H(<4 x i16> %vec, ptr noundef %i) {
126126
; KRYO-LABEL: 'LD1_H'
127127
; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2
128-
; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
128+
; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
129129
; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2
130130
;
131131
; NEO-LABEL: 'LD1_H'
132132
; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2
133-
; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
133+
; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
134134
; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2
135135
;
136136
entry:
@@ -142,12 +142,12 @@ entry:
142142
define <4 x i32> @LD1_W(<4 x i32> %vec, ptr noundef %i) {
143143
; KRYO-LABEL: 'LD1_W'
144144
; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4
145-
; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
145+
; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
146146
; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2
147147
;
148148
; NEO-LABEL: 'LD1_W'
149149
; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4
150-
; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
150+
; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
151151
; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2
152152
;
153153
entry:
@@ -159,12 +159,12 @@ entry:
159159
define <2 x i64> @LD1_X(<2 x i64> %vec, ptr noundef %i) {
160160
; KRYO-LABEL: 'LD1_X'
161161
; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8
162-
; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
162+
; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
163163
; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2
164164
;
165165
; NEO-LABEL: 'LD1_X'
166166
; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8
167-
; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
167+
; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
168168
; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2
169169
;
170170
entry:

0 commit comments

Comments
 (0)