Skip to content

Commit ab05ab5

Browse files
committed
[CostModel][AMDGPU] Fix instructions costs estimation for vector types.
1. Fixed vector instructions costs estimations incosistency - removed different logic for "not simple types" since it biases costs for these types. 2. Fixed legalization penalty for vectors too big for the target: changed from overwrite default legalization cost value estimation to added penalty. 3. Fixed few typos in tests. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D114893
1 parent 266a66c commit ab05ab5

File tree

11 files changed

+259
-169
lines changed

11 files changed

+259
-169
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -519,57 +519,6 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
519519
TTI::OperandValueProperties Opd1PropInfo,
520520
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
521521
const Instruction *CxtI) {
522-
EVT OrigTy = TLI->getValueType(DL, Ty);
523-
if (!OrigTy.isSimple()) {
524-
// FIXME: We're having to query the throughput cost so that the basic
525-
// implementation tries to generate legalize and scalarization costs. Maybe
526-
// we could hoist the scalarization code here?
527-
if (CostKind != TTI::TCK_CodeSize)
528-
return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
529-
Opd1Info, Opd2Info, Opd1PropInfo,
530-
Opd2PropInfo, Args, CxtI);
531-
// Scalarization
532-
533-
// Check if any of the operands are vector operands.
534-
int ISD = TLI->InstructionOpcodeToISD(Opcode);
535-
assert(ISD && "Invalid opcode");
536-
537-
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
538-
539-
bool IsFloat = Ty->isFPOrFPVectorTy();
540-
// Assume that floating point arithmetic operations cost twice as much as
541-
// integer operations.
542-
unsigned OpCost = (IsFloat ? 2 : 1);
543-
544-
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
545-
// The operation is legal. Assume it costs 1.
546-
// TODO: Once we have extract/insert subvector cost we need to use them.
547-
return LT.first * OpCost;
548-
}
549-
550-
if (!TLI->isOperationExpand(ISD, LT.second)) {
551-
// If the operation is custom lowered, then assume that the code is twice
552-
// as expensive.
553-
return LT.first * 2 * OpCost;
554-
}
555-
556-
// Else, assume that we need to scalarize this op.
557-
// TODO: If one of the types get legalized by splitting, handle this
558-
// similarly to what getCastInstrCost() does.
559-
if (auto *VTy = dyn_cast<VectorType>(Ty)) {
560-
unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
561-
InstructionCost Cost = getArithmeticInstrCost(
562-
Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
563-
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
564-
// Return the cost of multiple scalar invocation plus the cost of
565-
// inserting and extracting the values.
566-
SmallVector<Type *> Tys(Args.size(), Ty);
567-
return getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
568-
}
569-
570-
// We don't know anything about this scalar instruction.
571-
return OpCost;
572-
}
573522

574523
// Legalize the type.
575524
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12477,6 +12477,6 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
1247712477
if (Size <= 256)
1247812478
return Cost;
1247912479

12480-
Cost.first = (Size + 255) / 256;
12480+
Cost.first += (Size + 255) / 256;
1248112481
return Cost;
1248212482
}

llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define amdgpu_kernel void @add_i32() #0 {
1515
; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
1616
; ALL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
1717
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
18-
; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
18+
; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
1919
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
2020
;
2121
; ALL-SIZE-LABEL: 'add_i32'
@@ -27,7 +27,7 @@ define amdgpu_kernel void @add_i32() #0 {
2727
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
2828
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
2929
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
30-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
30+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
3131
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
3232
;
3333
%i32 = add i32 undef, undef
@@ -38,7 +38,7 @@ define amdgpu_kernel void @add_i32() #0 {
3838
%v6i32 = add <6 x i32> undef, undef
3939
%v7i32 = add <7 x i32> undef, undef
4040
%v8i32 = add <8 x i32> undef, undef
41-
%v32i32 = add <32 x i32> undef, undef
41+
%v9i32 = add <9 x i32> undef, undef
4242
ret void
4343
}
4444

@@ -48,34 +48,22 @@ define amdgpu_kernel void @add_i64() #0 {
4848
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
4949
; ALL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
5050
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
51-
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
52-
; ALL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
53-
; ALL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
54-
; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
55-
; ALL-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
51+
; ALL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
5652
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
5753
;
5854
; ALL-SIZE-LABEL: 'add_i64'
5955
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = add i64 undef, undef
6056
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
6157
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
6258
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
63-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
64-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
65-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
66-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
67-
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
59+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
6860
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
6961
;
7062
%i64 = add i64 undef, undef
7163
%v2i64 = add <2 x i64> undef, undef
7264
%v3i64 = add <3 x i64> undef, undef
7365
%v4i64 = add <4 x i64> undef, undef
7466
%v5i64 = add <5 x i64> undef, undef
75-
%v6i64 = add <6 x i64> undef, undef
76-
%v7i64 = add <7 x i64> undef, undef
77-
%v8i64 = add <8 x i64> undef, undef
78-
%v16i64 = add <16 x i64> undef, undef
7967
ret void
8068
}
8169

@@ -87,6 +75,8 @@ define amdgpu_kernel void @add_i16() #0 {
8775
; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
8876
; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
8977
; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
78+
; FAST16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
79+
; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
9080
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
9181
;
9282
; SLOW16-LABEL: 'add_i16'
@@ -96,6 +86,8 @@ define amdgpu_kernel void @add_i16() #0 {
9686
; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
9787
; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
9888
; SLOW16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
89+
; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
90+
; SLOW16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
9991
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
10092
;
10193
; FAST16-SIZE-LABEL: 'add_i16'
@@ -105,6 +97,8 @@ define amdgpu_kernel void @add_i16() #0 {
10597
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
10698
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
10799
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
100+
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
101+
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
108102
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
109103
;
110104
; SLOW16-SIZE-LABEL: 'add_i16'
@@ -114,6 +108,8 @@ define amdgpu_kernel void @add_i16() #0 {
114108
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
115109
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
116110
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
111+
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
112+
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
117113
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
118114
;
119115
%i16 = add i16 undef, undef
@@ -122,6 +118,8 @@ define amdgpu_kernel void @add_i16() #0 {
122118
%v4i16 = add <4 x i16> undef, undef
123119
%v5i16 = add <5 x i16> undef, undef
124120
%v6i16 = add <6 x i16> undef, undef
121+
%v16i16 = add <16 x i16> undef, undef
122+
%v17i16 = add <17 x i16> undef, undef
125123
ret void
126124
}
127125

@@ -133,6 +131,8 @@ define amdgpu_kernel void @add_i8() #0 {
133131
; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
134132
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
135133
; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
134+
; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
135+
; ALL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
136136
; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
137137
;
138138
; ALL-SIZE-LABEL: 'add_i8'
@@ -142,6 +142,8 @@ define amdgpu_kernel void @add_i8() #0 {
142142
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
143143
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
144144
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
145+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
146+
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
145147
; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
146148
;
147149
%i8 = add i8 undef, undef
@@ -150,12 +152,14 @@ define amdgpu_kernel void @add_i8() #0 {
150152
%v4i8 = add <4 x i8> undef, undef
151153
%v5i8 = add <5 x i8> undef, undef
152154
%v6i8 = add <6 x i8> undef, undef
155+
%v32i8 = add <32 x i8> undef, undef
156+
%v33i8 = add <33 x i8> undef, undef
153157
ret void
154158
}
155159

156160
define amdgpu_kernel void @sub() #0 {
157161
; FAST16-LABEL: 'sub'
158-
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
162+
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
159163
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
160164
; FAST16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
161165
; FAST16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -165,7 +169,7 @@ define amdgpu_kernel void @sub() #0 {
165169
; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
166170
;
167171
; SLOW16-LABEL: 'sub'
168-
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
172+
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
169173
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
170174
; SLOW16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
171175
; SLOW16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -175,7 +179,7 @@ define amdgpu_kernel void @sub() #0 {
175179
; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
176180
;
177181
; FAST16-SIZE-LABEL: 'sub'
178-
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
182+
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
179183
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
180184
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
181185
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -185,7 +189,7 @@ define amdgpu_kernel void @sub() #0 {
185189
; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
186190
;
187191
; SLOW16-SIZE-LABEL: 'sub'
188-
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
192+
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
189193
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
190194
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
191195
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -194,7 +198,7 @@ define amdgpu_kernel void @sub() #0 {
194198
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
195199
; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
196200
;
197-
%i8 = sub i16 undef, undef
201+
%i8 = sub i8 undef, undef
198202
%i16 = sub i16 undef, undef
199203
%i32 = sub i32 undef, undef
200204
%i64 = sub i64 undef, undef

0 commit comments

Comments
 (0)