Skip to content

Commit fcebc6c

Browse files
committed
TTI: Check legalization cost of add/sub overflow ISD nodes
1 parent 8cae9dc commit fcebc6c

File tree

5 files changed

+358
-344
lines changed

5 files changed

+358
-344
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 52 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,44 +2184,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21842184
return Cost;
21852185
}
21862186
case Intrinsic::sadd_with_overflow:
2187-
case Intrinsic::ssub_with_overflow: {
2188-
Type *SumTy = RetTy->getContainedType(0);
2189-
Type *OverflowTy = RetTy->getContainedType(1);
2190-
unsigned Opcode = IID == Intrinsic::sadd_with_overflow
2191-
? BinaryOperator::Add
2192-
: BinaryOperator::Sub;
2193-
2194-
// Add:
2195-
// Overflow -> (Result < LHS) ^ (RHS < 0)
2196-
// Sub:
2197-
// Overflow -> (Result < LHS) ^ (RHS > 0)
2198-
InstructionCost Cost = 0;
2199-
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2200-
Cost += 2 * thisT()->getCmpSelInstrCost(
2201-
Instruction::ICmp, SumTy, OverflowTy,
2202-
CmpInst::ICMP_SGT, CostKind);
2203-
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2204-
CostKind);
2205-
return Cost;
2206-
}
2187+
ISD = ISD::SADDO;
2188+
break;
2189+
case Intrinsic::ssub_with_overflow:
2190+
ISD = ISD::SSUBO;
2191+
break;
22072192
case Intrinsic::uadd_with_overflow:
2208-
case Intrinsic::usub_with_overflow: {
2209-
Type *SumTy = RetTy->getContainedType(0);
2210-
Type *OverflowTy = RetTy->getContainedType(1);
2211-
unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2212-
? BinaryOperator::Add
2213-
: BinaryOperator::Sub;
2214-
CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
2215-
? CmpInst::ICMP_ULT
2216-
: CmpInst::ICMP_UGT;
2217-
2218-
InstructionCost Cost = 0;
2219-
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2220-
Cost +=
2221-
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
2222-
Pred, CostKind);
2223-
return Cost;
2224-
}
2193+
ISD = ISD::UADDO;
2194+
break;
2195+
case Intrinsic::usub_with_overflow:
2196+
ISD = ISD::USUBO;
2197+
break;
22252198
case Intrinsic::smul_with_overflow:
22262199
case Intrinsic::umul_with_overflow: {
22272200
Type *MulTy = RetTy->getContainedType(0);
@@ -2300,8 +2273,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23002273
break;
23012274
}
23022275

2276+
auto *ST = dyn_cast<StructType>(RetTy);
2277+
Type *LegalizeTy = ST ? ST->getContainedType(0) : RetTy;
2278+
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(LegalizeTy);
2279+
23032280
const TargetLoweringBase *TLI = getTLI();
2304-
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
23052281

23062282
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
23072283
if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
@@ -2357,6 +2333,44 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23572333
Pred, CostKind);
23582334
return Cost;
23592335
}
2336+
case Intrinsic::sadd_with_overflow:
2337+
case Intrinsic::ssub_with_overflow: {
2338+
Type *SumTy = RetTy->getContainedType(0);
2339+
Type *OverflowTy = RetTy->getContainedType(1);
2340+
unsigned Opcode = IID == Intrinsic::sadd_with_overflow
2341+
? BinaryOperator::Add
2342+
: BinaryOperator::Sub;
2343+
2344+
// Add:
2345+
// Overflow -> (Result < LHS) ^ (RHS < 0)
2346+
// Sub:
2347+
// Overflow -> (Result < LHS) ^ (RHS > 0)
2348+
InstructionCost Cost = 0;
2349+
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2350+
Cost +=
2351+
2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy,
2352+
CmpInst::ICMP_SGT, CostKind);
2353+
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2354+
CostKind);
2355+
return Cost;
2356+
}
2357+
case Intrinsic::uadd_with_overflow:
2358+
case Intrinsic::usub_with_overflow: {
2359+
Type *SumTy = RetTy->getContainedType(0);
2360+
Type *OverflowTy = RetTy->getContainedType(1);
2361+
unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2362+
? BinaryOperator::Add
2363+
: BinaryOperator::Sub;
2364+
CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
2365+
? CmpInst::ICMP_ULT
2366+
: CmpInst::ICMP_UGT;
2367+
2368+
InstructionCost Cost = 0;
2369+
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2370+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
2371+
OverflowTy, Pred, CostKind);
2372+
return Cost;
2373+
}
23602374
case Intrinsic::sadd_sat:
23612375
case Intrinsic::ssub_sat: {
23622376
// Assume a default expansion.

llvm/test/Analysis/CostModel/ARM/active_lane_mask.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@
33

44
define void @get_lane_mask() {
55
; CHECK-LABEL: 'get_lane_mask'
6-
; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
7-
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
8-
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
9-
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef)
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef)
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
13-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
15-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %mask_v8i1_i16 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16 undef, i16 undef)
16-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %mask_v4i1_i16 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16 undef, i16 undef)
17-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %mask_v2i1_i16 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16 undef, i16 undef)
6+
; CHECK-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
7+
; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef)
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v8i1_i16 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i16(i16 undef, i16 undef)
16+
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %mask_v4i1_i16 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i16(i16 undef, i16 undef)
17+
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %mask_v2i1_i16 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i16(i16 undef, i16 undef)
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
1919
;
2020
%mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)

0 commit comments

Comments
 (0)