Skip to content

[SystemZ] i128 cost model #78528

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 74 additions & 33 deletions llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
return TTI::TCC_Free;
// No cost model for operations on integers larger than 64 bit implemented yet.
if (BitSize > 64)
// No cost model for operations on integers larger than 128 bit implemented yet.
if ((!ST->hasVector() && BitSize > 64) || BitSize > 128)
return TTI::TCC_Free;

if (Imm == 0)
Expand All @@ -96,7 +96,8 @@ InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
return 2 * TTI::TCC_Basic;
}

return 4 * TTI::TCC_Basic;
// i128 immediates loads from Constant Pool
return 2 * TTI::TCC_Basic;
}

InstructionCost SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
Expand Down Expand Up @@ -479,21 +480,27 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
return LIBCALL_COST;

// Give discount for some combined logical operations if supported.
if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
if (Args.size() == 2) {
if (Opcode == Instruction::Xor) {
for (const Value *A : Args) {
if (const Instruction *I = dyn_cast<Instruction>(A))
if (I->hasOneUse() &&
(I->getOpcode() == Instruction::And ||
I->getOpcode() == Instruction::Or ||
(I->getOpcode() == Instruction::Or ||
I->getOpcode() == Instruction::And ||
I->getOpcode() == Instruction::Xor))
return 0;
if ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
(isInt128InVR(Ty) &&
(I->getOpcode() == Instruction::Or || ST->hasVectorEnhancements1())))
return 0;
}
}
else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
else if (Opcode == Instruction::And || Opcode == Instruction::Or) {
for (const Value *A : Args) {
if (const Instruction *I = dyn_cast<Instruction>(A))
if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
if ((I->hasOneUse() && I->getOpcode() == Instruction::Xor) &&
((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
(isInt128InVR(Ty) &&
(Opcode == Instruction::And || ST->hasVectorEnhancements1()))))
return 0;
}
}
Expand Down Expand Up @@ -774,29 +781,63 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
assert (!Dst->isVectorTy());

if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
if (Src->isIntegerTy(128))
return LIBCALL_COST;
if (SrcScalarBits >= 32 ||
(I != nullptr && isa<LoadInst>(I->getOperand(0))))
return 1;
return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
}

if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
Src->isIntegerTy(1)) {
if (ST->hasLoadStoreOnCond2())
return 2; // li 0; loc 1

// This should be extension of a compare i1 result, which is done with
// ipm and a varying sequence of instructions.
unsigned Cost = 0;
if (Opcode == Instruction::SExt)
Cost = (DstScalarBits < 64 ? 3 : 4);
if (Opcode == Instruction::ZExt)
Cost = 3;
Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
// If operands of an fp-type was compared, this costs +1.
Cost++;
return Cost;
if ((Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) &&
Dst->isIntegerTy(128))
return LIBCALL_COST;

if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt)) {
if (Src->isIntegerTy(1)) {
if (DstScalarBits == 128)
return 5 /*branch seq.*/;

if (ST->hasLoadStoreOnCond2())
return 2; // li 0; loc 1

// This should be extension of a compare i1 result, which is done with
// ipm and a varying sequence of instructions.
unsigned Cost = 0;
if (Opcode == Instruction::SExt)
Cost = (DstScalarBits < 64 ? 3 : 4);
if (Opcode == Instruction::ZExt)
Cost = 3;
Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
// If operands of an fp-type was compared, this costs +1.
Cost++;
return Cost;
}
else if (isInt128InVR(Dst)) {
// Extensions from GPR to i128 (in VR) typically costs two instructions,
// but a zero-extending load would be just one extra instruction.
if (Opcode == Instruction::ZExt && I != nullptr)
if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
if (Ld->hasOneUse())
return 1;
return 2;
}
}

if (Opcode == Instruction::Trunc && isInt128InVR(Src) && I != nullptr) {
if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
if (Ld->hasOneUse())
return 0; // Will be converted to GPR load.
bool OnlyTruncatingStores = true;
for (const User *U : I->users())
if (!isa<StoreInst>(U)) {
OnlyTruncatingStores = false;
break;
}
if (OnlyTruncatingStores)
return 0;
return 2; // Vector element extraction.
}
}
else if (ST->hasVector()) {
Expand Down Expand Up @@ -930,7 +971,7 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// A loaded value compared with 0 with multiple users becomes Load and
// Test. The load is then not foldable, so return 0 cost for the ICmp.
unsigned ScalarBits = ValTy->getScalarSizeInBits();
if (I != nullptr && ScalarBits >= 32)
if (I != nullptr && (ScalarBits == 32 || ScalarBits == 64))
if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
Expand All @@ -943,8 +984,8 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Cost;
}
case Instruction::Select:
if (ValTy->isFloatingPointTy())
return 4; // No load on condition for FP - costs a conditional jump.
if (ValTy->isFloatingPointTy() || isInt128InVR(ValTy))
return 4; // No LOC for FP / i128 - costs a conditional jump.
return 1; // Load On Condition / Select Register.
}
}
Expand Down Expand Up @@ -1157,6 +1198,10 @@ InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);

// FP128 is a legal type but kept in a register pair on older CPUs.
if (Src->isFP128Ty() && !ST->hasVectorEnhancements1())
return 2;

unsigned NumOps =
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));

Expand All @@ -1177,10 +1222,6 @@ InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
}

if (Src->getScalarSizeInBits() == 128)
// 128 bit scalars are held in a pair of two 64 bit registers.
NumOps *= 2;

return NumOps;
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {

unsigned const LIBCALL_COST = 30;

bool isInt128InVR(Type *Ty) { return Ty->isIntegerTy(128) && ST->hasVector(); }

public:
explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
Expand Down
143 changes: 143 additions & 0 deletions llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
;

define i128 @fun1(i128 %val1, i128 %val2) {
; CHECK-LABEL: 'fun1'
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v128 = sext i1 %cmp to i128
%cmp = icmp eq i128 %val1, %val2
%v128 = sext i1 %cmp to i128
ret i128 %v128
}

define i128 @fun2(i128 %val1, i128 %val2) {
; CHECK-LABEL: 'fun2'
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v128 = zext i1 %cmp to i128
%cmp = icmp eq i128 %val1, %val2
%v128 = zext i1 %cmp to i128
ret i128 %v128
}

define i128 @fun3(i128 %val1, i128 %val2,
i128 %val3, i128 %val4) {
; CHECK-LABEL: 'fun3'
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %add = add i128 %val3, %val4
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add
%cmp = icmp eq i128 %val1, %val2
%add = add i128 %val3, %val4
%sel = select i1 %cmp, i128 %val3, i128 %add
ret i128 %sel
}

define i128 @fun4(ptr %src) {
; CHECK-LABEL: 'fun4'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = sext i64 %v to i128
%v = load i64, ptr %src, align 8
%res = sext i64 %v to i128
ret i128 %res
}

define i128 @fun5(i64 %lhs, i64 %rhs) {
; CHECK-LABEL: 'fun5'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = sext i64 %v to i128
%v = add i64 %lhs, %rhs
%res = sext i64 %v to i128
ret i128 %res
}

define i128 @fun6(ptr %src) {
; CHECK-LABEL: 'fun6'
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res = zext i64 %v to i128
%v = load i64, ptr %src, align 8
%res = zext i64 %v to i128
ret i128 %res
}

define i128 @fun7(i64 %lhs, i64 %rhs) {
; CHECK-LABEL: 'fun7'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = zext i64 %v to i128
%v = add i64 %lhs, %rhs
%res = zext i64 %v to i128
ret i128 %res
}

; Truncating store is free.
define void @fun8(i128 %lhs, i128 %rhs, ptr %dst) {
; CHECK-LABEL: 'fun8'
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %t = trunc i128 %v to i64
%v = add i128 %lhs, %rhs
%t = trunc i128 %v to i64
store i64 %t, ptr %dst, align 8
ret void
}

; If there is a non-store user, an extraction is needed.
define i64 @fun9(i128 %lhs, i128 %rhs, ptr %dst) {
; CHECK-LABEL: 'fun9'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %t = trunc i128 %v to i64
%v = add i128 %lhs, %rhs
%t = trunc i128 %v to i64
store i64 %t, ptr %dst, align 8
ret i64 %t
}

; Truncation of load is free.
define i64 @fun10(ptr %src) {
; CHECK-LABEL: 'fun10'
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %t = trunc i128 %v to i64
%v = load i128, ptr %src, align 8
%t = trunc i128 %v to i64
ret i64 %t
}

; If the load has another user, the truncation becomes an extract.
define i64 @fun11(ptr %src, i128 %val2, ptr %dst) {
; CHECK-LABEL: 'fun11'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %t = trunc i128 %v to i64
%v = load i128, ptr %src, align 8
%t = trunc i128 %v to i64
%a = add i128 %v, %val2
store i128 %a, ptr %dst
ret i64 %t
}

; Trunction with a GPR use typically requires an extraction.
define i64 @fun12(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: 'fun12'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %t = trunc i128 %v to i64
%v = add i128 %lhs, %rhs
%t = trunc i128 %v to i64
ret i64 %t
}

; Fp<->Int conversions require libcalls.
define void @fun13() {
; CHECK-LABEL: 'fun13'
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v0 = fptosi fp128 undef to i128
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v1 = fptosi double undef to i128
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v2 = fptosi float undef to i128
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v3 = fptoui fp128 undef to i128
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v4 = fptoui double undef to i128
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v5 = fptoui float undef to i128
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v6 = sitofp i128 undef to fp128
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v7 = sitofp i128 undef to double
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v8 = sitofp i128 undef to float
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v9 = uitofp i128 undef to fp128
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v10 = uitofp i128 undef to double
; CHECK: Cost Model: Found an estimated cost of 30 for instruction: %v11 = uitofp i128 undef to float
%v0 = fptosi fp128 undef to i128
%v1 = fptosi double undef to i128
%v2 = fptosi float undef to i128
%v3 = fptoui fp128 undef to i128
%v4 = fptoui double undef to i128
%v5 = fptoui float undef to i128
%v6 = sitofp i128 undef to fp128
%v7 = sitofp i128 undef to double
%v8 = sitofp i128 undef to float
%v9 = uitofp i128 undef to fp128
%v10 = uitofp i128 undef to double
%v11 = uitofp i128 undef to float
ret void
}
6 changes: 6 additions & 0 deletions llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ define void @add() {
%res1 = add i16 undef, undef
%res2 = add i32 undef, undef
%res3 = add i64 undef, undef
%resQ = add i128 undef, undef
%res4 = add <2 x i8> undef, undef
%res5 = add <2 x i16> undef, undef
%res6 = add <2 x i32> undef, undef
Expand All @@ -29,6 +30,7 @@ define void @add() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = add i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = add i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = add i64 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = add i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = add <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = add <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = add <2 x i32> undef, undef
Expand All @@ -54,6 +56,7 @@ define void @sub() {
%res1 = sub i16 undef, undef
%res2 = sub i32 undef, undef
%res3 = sub i64 undef, undef
%resQ = sub i128 undef, undef
%res4 = sub <2 x i8> undef, undef
%res5 = sub <2 x i16> undef, undef
%res6 = sub <2 x i32> undef, undef
Expand All @@ -75,6 +78,7 @@ define void @sub() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = sub i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = sub i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = sub i64 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = sub i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = sub <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = sub <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = sub <2 x i32> undef, undef
Expand All @@ -100,6 +104,7 @@ define void @mul() {
%res1 = mul i16 undef, undef
%res2 = mul i32 undef, undef
%res3 = mul i64 undef, undef
%resQ = mul i128 undef, undef
%res4 = mul <2 x i8> undef, undef
%res5 = mul <2 x i16> undef, undef
%res6 = mul <2 x i32> undef, undef
Expand All @@ -121,6 +126,7 @@ define void @mul() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res1 = mul i16 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res2 = mul i32 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res3 = mul i64 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %resQ = mul i128 undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <2 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = mul <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = mul <2 x i32> undef, undef
Expand Down
Loading