Skip to content

Commit d84d3f2

Browse files
committed
compute approx default expansion cost in expand case
1 parent a7880e6 commit d84d3f2

19 files changed

+1573
-1964
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,6 +2339,47 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23392339
thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
23402340
}
23412341

2342+
if (IID == Intrinsic::sadd_sat || IID == Intrinsic::ssub_sat) {
2343+
// Assume a default expansion.
2344+
Type *CondTy = RetTy->getWithNewBitWidth(1);
2345+
2346+
Type *OpTy = StructType::create({RetTy, CondTy});
2347+
Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
2348+
? Intrinsic::sadd_with_overflow
2349+
: Intrinsic::ssub_with_overflow;
2350+
CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2351+
2352+
// SatMax -> Overflow && SumDiff < 0
2353+
// SatMin -> Overflow && SumDiff >= 0
2354+
InstructionCost Cost = 0;
2355+
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
2356+
nullptr, ScalarizationCostPassed);
2357+
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2358+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2359+
Pred, CostKind);
2360+
Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
2361+
CondTy, Pred, CostKind);
2362+
return Cost;
2363+
}
2364+
2365+
if (IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat) {
2366+
Type *CondTy = RetTy->getWithNewBitWidth(1);
2367+
2368+
Type *OpTy = StructType::create({RetTy, CondTy});
2369+
Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
2370+
? Intrinsic::uadd_with_overflow
2371+
: Intrinsic::usub_with_overflow;
2372+
2373+
InstructionCost Cost = 0;
2374+
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
2375+
nullptr, ScalarizationCostPassed);
2376+
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2377+
Cost +=
2378+
thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2379+
CmpInst::BAD_ICMP_PREDICATE, CostKind);
2380+
return Cost;
2381+
}
2382+
23422383
// Else, assume that we need to scalarize this intrinsic. For math builtins
23432384
// this will emit a costly libcall, adding call overhead and spills. Make it
23442385
// very expensive.

llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,23 @@ declare <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
3333

3434
define i32 @add(i32 %arg) {
3535
; RECIP-LABEL: 'add'
36-
; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
36+
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
3737
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
3838
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
3939
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
40-
; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
40+
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
4141
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
4242
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
4343
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
4444
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
4545
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef)
46-
; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
46+
; RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
4747
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
4848
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
4949
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
5050
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
5151
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
52-
; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
52+
; RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
5353
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
5454
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
5555
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -59,23 +59,23 @@ define i32 @add(i32 %arg) {
5959
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
6060
;
6161
; SIZE-LABEL: 'add'
62-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
62+
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
6363
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
6464
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
6565
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
66-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
66+
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
6767
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
6868
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
6969
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
7070
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
7171
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef)
72-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
72+
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
7373
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
7474
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
7575
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
7676
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
7777
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
78-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
78+
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
7979
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
8080
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
8181
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -142,22 +142,22 @@ declare <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>)
142142

143143
define i32 @sub(i32 %arg) {
144144
; RECIP-LABEL: 'sub'
145-
; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
145+
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
146146
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
147147
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
148148
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
149-
; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
149+
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
150150
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
151151
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
152152
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
153153
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
154-
; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
154+
; RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
155155
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
156156
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
157157
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
158158
; RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
159159
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
160-
; RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
160+
; RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
161161
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
162162
; RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
163163
; RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
@@ -167,22 +167,22 @@ define i32 @sub(i32 %arg) {
167167
; RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
168168
;
169169
; SIZE-LABEL: 'sub'
170-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
170+
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
171171
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
172172
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
173173
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
174-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
174+
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
175175
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
176176
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
177177
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
178178
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
179-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
179+
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
180180
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
181181
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
182182
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
183183
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
184184
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
185-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
185+
; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
186186
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
187187
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
188188
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)

0 commit comments

Comments
 (0)