Skip to content

Commit b7e5db1

Browse files
RKSimonyuxuanchen1997
authored andcommitted
[CostModel][X86] Improve add/sub/mul overflow intrinsic costs
Summary: Noticed due to x86 changes in #97463 Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250555
1 parent b9815c5 commit b7e5db1

File tree

7 files changed

+259
-253
lines changed

7 files changed

+259
-253
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4167,9 +4167,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
41674167
{ ISD::SMIN, MVT::i64, { 1, 3, 2, 3 } },
41684168
{ ISD::UMAX, MVT::i64, { 1, 3, 2, 3 } },
41694169
{ ISD::UMIN, MVT::i64, { 1, 3, 2, 3 } },
4170-
{ ISD::SADDO, MVT::i64, { 1 } },
4171-
{ ISD::UADDO, MVT::i64, { 1 } },
4172-
{ ISD::UMULO, MVT::i64, { 2 } }, // mulq + seto
4170+
{ ISD::SADDO, MVT::i64, { 2, 2, 4, 6 } },
4171+
{ ISD::UADDO, MVT::i64, { 2, 2, 4, 6 } },
4172+
{ ISD::SMULO, MVT::i64, { 4, 4, 4, 6 } },
4173+
{ ISD::UMULO, MVT::i64, { 8, 8, 4, 7 } },
41734174
};
41744175
static const CostKindTblEntry X86CostTbl[] = { // 32 or 64-bit targets
41754176
{ ISD::ABS, MVT::i32, { 1, 2, 3, 3 } }, // SUB+XOR+SRA or SUB+CMOV
@@ -4231,15 +4232,18 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
42314232
{ ISD::UMIN, MVT::i32, { 1, 2, 2, 3 } },
42324233
{ ISD::UMIN, MVT::i16, { 1, 4, 2, 4 } },
42334234
{ ISD::UMIN, MVT::i8, { 1, 4, 2, 4 } },
4234-
{ ISD::SADDO, MVT::i32, { 1 } },
4235-
{ ISD::SADDO, MVT::i16, { 1 } },
4236-
{ ISD::SADDO, MVT::i8, { 1 } },
4237-
{ ISD::UADDO, MVT::i32, { 1 } },
4238-
{ ISD::UADDO, MVT::i16, { 1 } },
4239-
{ ISD::UADDO, MVT::i8, { 1 } },
4240-
{ ISD::UMULO, MVT::i32, { 2 } }, // mul + seto
4241-
{ ISD::UMULO, MVT::i16, { 2 } },
4242-
{ ISD::UMULO, MVT::i8, { 2 } },
4235+
{ ISD::SADDO, MVT::i32, { 2, 2, 4, 6 } },
4236+
{ ISD::SADDO, MVT::i16, { 2, 2, 4, 6 } },
4237+
{ ISD::SADDO, MVT::i8, { 2, 2, 4, 6 } },
4238+
{ ISD::UADDO, MVT::i32, { 2, 2, 4, 6 } },
4239+
{ ISD::UADDO, MVT::i16, { 2, 2, 4, 6 } },
4240+
{ ISD::UADDO, MVT::i8, { 2, 2, 4, 6 } },
4241+
{ ISD::SMULO, MVT::i32, { 2, 2, 4, 6 } },
4242+
{ ISD::SMULO, MVT::i16, { 5, 5, 4, 6 } },
4243+
{ ISD::SMULO, MVT::i8, { 6, 6, 4, 6 } },
4244+
{ ISD::UMULO, MVT::i32, { 6, 6, 4, 8 } },
4245+
{ ISD::UMULO, MVT::i16, { 6, 6, 4, 9 } },
4246+
{ ISD::UMULO, MVT::i8, { 6, 6, 4, 6 } },
42434247
};
42444248

42454249
Type *RetTy = ICA.getReturnType();
@@ -4352,9 +4356,11 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
43524356
ISD = ISD::UADDO;
43534357
OpTy = RetTy->getContainedType(0);
43544358
break;
4355-
case Intrinsic::umul_with_overflow:
43564359
case Intrinsic::smul_with_overflow:
4357-
// SMULO has same costs so don't duplicate.
4360+
ISD = ISD::SMULO;
4361+
OpTy = RetTy->getContainedType(0);
4362+
break;
4363+
case Intrinsic::umul_with_overflow:
43584364
ISD = ISD::UMULO;
43594365
OpTy = RetTy->getContainedType(0);
43604366
break;

llvm/test/Analysis/CostModel/X86/arith-overflow.ll

Lines changed: 230 additions & 230 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/X86/costmodel.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ define i64 @foo(i64 %arg) {
2929
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to ptr
3030
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint ptr undef to i64
3131
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32
32-
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
32+
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
3333
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void undef()
3434
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 undef
3535
;

llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,22 @@ declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
5050

5151
define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
5252
; THRU-LABEL: 'umul'
53-
; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
53+
; THRU-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
5454
; THRU-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
5555
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
5656
;
5757
; LATE-LABEL: 'umul'
58-
; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
58+
; LATE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
5959
; LATE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
6060
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
6161
;
6262
; SIZE-LABEL: 'umul'
63-
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
63+
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
6464
; SIZE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
6565
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
6666
;
6767
; SIZE_LATE-LABEL: 'umul'
68-
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
68+
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
6969
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
7070
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
7171
;

llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
188188
; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
189189
; CHECK-NEXT: [[ENTRY:.*]]:
190190
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1
191-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 30
191+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 54
192192
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
193193
; CHECK: [[VECTOR_SCEVCHECK]]:
194194
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG2]], i64 8
@@ -379,7 +379,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
379379
; CHECK-NEXT: [[ENTRY:.*]]:
380380
; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3
381381
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
382-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 28
382+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 52
383383
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
384384
; CHECK: [[VECTOR_SCEVCHECK]]:
385385
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3

llvm/test/Transforms/LoopVectorize/X86/pr35432.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ define i32 @main(ptr %ptr) {
3838
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1
3939
; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP2]])
4040
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], [[UMIN1]]
41-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 32
41+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 40
4242
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4343
; CHECK: vector.scevcheck:
4444
; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[CONV3]], -1

llvm/test/Transforms/LoopVectorize/X86/pr54634.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
1919
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 1
2020
; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[TBAA8]]
2121
; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP2]], 1
22-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 28
22+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 60
2323
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
2424
; CHECK: vector.scevcheck:
2525
; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]])

0 commit comments

Comments
 (0)