Skip to content

Commit ad81252

Browse files
committed
TTI: Check legalization cost of add/sub overflow ISD nodes
1 parent d89fa11 commit ad81252

File tree

5 files changed

+386
-372
lines changed

5 files changed

+386
-372
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 52 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2180,44 +2180,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21802180
return Cost;
21812181
}
21822182
case Intrinsic::sadd_with_overflow:
2183-
case Intrinsic::ssub_with_overflow: {
2184-
Type *SumTy = RetTy->getContainedType(0);
2185-
Type *OverflowTy = RetTy->getContainedType(1);
2186-
unsigned Opcode = IID == Intrinsic::sadd_with_overflow
2187-
? BinaryOperator::Add
2188-
: BinaryOperator::Sub;
2189-
2190-
// Add:
2191-
// Overflow -> (Result < LHS) ^ (RHS < 0)
2192-
// Sub:
2193-
// Overflow -> (Result < LHS) ^ (RHS > 0)
2194-
InstructionCost Cost = 0;
2195-
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2196-
Cost += 2 * thisT()->getCmpSelInstrCost(
2197-
Instruction::ICmp, SumTy, OverflowTy,
2198-
CmpInst::ICMP_SGT, CostKind);
2199-
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2200-
CostKind);
2201-
return Cost;
2202-
}
2183+
ISD = ISD::SADDO;
2184+
break;
2185+
case Intrinsic::ssub_with_overflow:
2186+
ISD = ISD::SSUBO;
2187+
break;
22032188
case Intrinsic::uadd_with_overflow:
2204-
case Intrinsic::usub_with_overflow: {
2205-
Type *SumTy = RetTy->getContainedType(0);
2206-
Type *OverflowTy = RetTy->getContainedType(1);
2207-
unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2208-
? BinaryOperator::Add
2209-
: BinaryOperator::Sub;
2210-
CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
2211-
? CmpInst::ICMP_ULT
2212-
: CmpInst::ICMP_UGT;
2213-
2214-
InstructionCost Cost = 0;
2215-
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2216-
Cost +=
2217-
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
2218-
Pred, CostKind);
2219-
return Cost;
2220-
}
2189+
ISD = ISD::UADDO;
2190+
break;
2191+
case Intrinsic::usub_with_overflow:
2192+
ISD = ISD::USUBO;
2193+
break;
22212194
case Intrinsic::smul_with_overflow:
22222195
case Intrinsic::umul_with_overflow: {
22232196
Type *MulTy = RetTy->getContainedType(0);
@@ -2296,8 +2269,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
22962269
break;
22972270
}
22982271

2272+
auto *ST = dyn_cast<StructType>(RetTy);
2273+
Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy;
2274+
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(LegalizeTy);
2275+
22992276
const TargetLoweringBase *TLI = getTLI();
2300-
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
23012277

23022278
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
23032279
if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
@@ -2353,6 +2329,44 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23532329
Pred, CostKind);
23542330
return Cost;
23552331
}
2332+
case Intrinsic::sadd_with_overflow:
2333+
case Intrinsic::ssub_with_overflow: {
2334+
Type *SumTy = RetTy->getContainedType(0);
2335+
Type *OverflowTy = RetTy->getContainedType(1);
2336+
unsigned Opcode = IID == Intrinsic::sadd_with_overflow
2337+
? BinaryOperator::Add
2338+
: BinaryOperator::Sub;
2339+
2340+
// Add:
2341+
// Overflow -> (Result < LHS) ^ (RHS < 0)
2342+
// Sub:
2343+
// Overflow -> (Result < LHS) ^ (RHS > 0)
2344+
InstructionCost Cost = 0;
2345+
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2346+
Cost +=
2347+
2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy,
2348+
CmpInst::ICMP_SGT, CostKind);
2349+
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2350+
CostKind);
2351+
return Cost;
2352+
}
2353+
case Intrinsic::uadd_with_overflow:
2354+
case Intrinsic::usub_with_overflow: {
2355+
Type *SumTy = RetTy->getContainedType(0);
2356+
Type *OverflowTy = RetTy->getContainedType(1);
2357+
unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2358+
? BinaryOperator::Add
2359+
: BinaryOperator::Sub;
2360+
CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
2361+
? CmpInst::ICMP_ULT
2362+
: CmpInst::ICMP_UGT;
2363+
2364+
InstructionCost Cost = 0;
2365+
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2366+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
2367+
OverflowTy, Pred, CostKind);
2368+
return Cost;
2369+
}
23562370
case Intrinsic::sadd_sat:
23572371
case Intrinsic::ssub_sat: {
23582372
// Assume a default expansion.

llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@
33

44
define void @get_lane_mask() {
55
; CHECK-LABEL: 'get_lane_mask'
6-
; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
7-
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
8-
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
9-
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef)
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef)
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
13-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
15-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i16 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16 undef, i16 undef)
16-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i16 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16 undef, i16 undef)
17-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i16 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16 undef, i16 undef)
6+
; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
7+
; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef)
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v8i1_i16 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16 undef, i16 undef)
16+
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %mask_v4i1_i16 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16 undef, i16 undef)
17+
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %mask_v2i1_i16 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16 undef, i16 undef)
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
1919
;
2020
%mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)

0 commit comments

Comments
 (0)