Skip to content

Commit 251dd7c

Browse files
committed
[x86] add cost overrides for mul with overflow
I'm assuming the standard size integer instructions for this end up as something like: mulq %rsi seto %al And the 'mul' generally has reciprocal throughput of 1 on typical implementations (higher latency, but that's not handled here). The default costs may end up much higher than that, and that's what we see in the test diffs. Vector types are left as a 'TODO'. Differential Revision: https://reviews.llvm.org/D90431
1 parent 7156910 commit 251dd7c

File tree

3 files changed

+94
-81
lines changed

3 files changed

+94
-81
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2288,6 +2288,9 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
22882288
// CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll
22892289
// CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll
22902290
// CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll
2291+
2292+
// TODO: Overflow intrinsics (*ADDO, *SUBO, *MULO) with vector types are not
2293+
// specialized in these tables yet.
22912294
static const CostTblEntry AVX512CDCostTbl[] = {
22922295
{ ISD::CTLZ, MVT::v8i64, 1 },
22932296
{ ISD::CTLZ, MVT::v16i32, 1 },
@@ -2669,6 +2672,7 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
26692672
{ ISD::CTPOP, MVT::i64, 10 },
26702673
{ ISD::SADDO, MVT::i64, 1 },
26712674
{ ISD::UADDO, MVT::i64, 1 },
2675+
{ ISD::UMULO, MVT::i64, 2 }, // mulq + seto
26722676
};
26732677
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
26742678
{ ISD::BITREVERSE, MVT::i32, 14 },
@@ -2689,6 +2693,9 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
26892693
{ ISD::UADDO, MVT::i32, 1 },
26902694
{ ISD::UADDO, MVT::i16, 1 },
26912695
{ ISD::UADDO, MVT::i8, 1 },
2696+
{ ISD::UMULO, MVT::i32, 2 }, // mul + seto
2697+
{ ISD::UMULO, MVT::i16, 2 },
2698+
{ ISD::UMULO, MVT::i8, 2 },
26922699
};
26932700

26942701
Type *RetTy = ICA.getReturnType();
@@ -2760,6 +2767,12 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
27602767
ISD = ISD::UADDO;
27612768
OpTy = RetTy->getContainedType(0);
27622769
break;
2770+
case Intrinsic::umul_with_overflow:
2771+
case Intrinsic::smul_with_overflow:
2772+
// SMULO has same costs so don't duplicate.
2773+
ISD = ISD::UMULO;
2774+
OpTy = RetTy->getContainedType(0);
2775+
break;
27632776
}
27642777

27652778
if (ISD != ISD::DELETED_NODE) {

0 commit comments

Comments
 (0)