Skip to content

Commit 48dfe67

Browse files
committed
[RISCV][CostModel] Add cost for fabs/fsqrt of type bf16/f16
1 parent 8652608 commit 48dfe67

File tree

3 files changed

+94
-50
lines changed

3 files changed

+94
-50
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,21 +1035,41 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
10351035
}
10361036
break;
10371037
}
1038-
case Intrinsic::fabs:
1038+
case Intrinsic::fabs: {
1039+
auto LT = getTypeLegalizationCost(RetTy);
1040+
if (ST->hasVInstructions() && LT.second.isVector()) {
1041+
// lui a0, 8
1042+
// addi a0, a0, -1
1043+
// vsetvli a1, zero, e16, m1, ta, ma
1044+
// vand.vx v8, v8, a0
1045+
// f16 with zvfhmin and bf16 with zvfhbmin
1046+
if (LT.second.getVectorElementType() == MVT::bf16 ||
1047+
(LT.second.getVectorElementType() == MVT::f16 &&
1048+
!ST->hasVInstructionsF16()))
1049+
return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1050+
CostKind) +
1051+
2;
1052+
else
1053+
return LT.first *
1054+
getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second, CostKind);
1055+
}
1056+
break;
1057+
}
10391058
case Intrinsic::sqrt: {
10401059
auto LT = getTypeLegalizationCost(RetTy);
1041-
// TODO: add f16/bf16, bf16 with zvfbfmin && f16 with zvfhmin
10421060
if (ST->hasVInstructions() && LT.second.isVector()) {
1043-
unsigned Op;
1044-
switch (ICA.getID()) {
1045-
case Intrinsic::fabs:
1046-
Op = RISCV::VFSGNJX_VV;
1047-
break;
1048-
case Intrinsic::sqrt:
1049-
Op = RISCV::VFSQRT_V;
1050-
break;
1051-
}
1052-
return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1061+
SmallVector<unsigned, 3> Opcodes;
1062+
// f16 with zvfhmin and bf16 with zvfbfmin
1063+
if (LT.second.getVectorElementType() == MVT::bf16 &&
1064+
ST->hasVInstructionsBF16Minimal())
1065+
Opcodes = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFSQRT_V,
1066+
RISCV::VFNCVTBF16_F_F_W};
1067+
else if (LT.second.getVectorElementType() == MVT::f16 &&
1068+
!ST->hasVInstructionsF16())
1069+
Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFSQRT_V, RISCV::VFNCVT_F_F_W};
1070+
else
1071+
Opcodes = {RISCV::VFSQRT_V};
1072+
return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
10531073
}
10541074
break;
10551075
}

llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
define void @fabs() {
66
; CHECK-LABEL: 'fabs'
77
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fabs.bf16(bfloat undef)
8-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> undef)
9-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> undef)
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> undef)
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> undef)
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> undef)
13-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> undef)
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> undef)
15-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> undef)
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> undef)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> undef)
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> undef)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> undef)
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> undef)
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> undef)
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> undef)
1616
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.fabs.f32(float undef)
1717
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
@@ -66,17 +66,29 @@ define void @fabs() {
6666
}
6767

6868
define void @fabs_f16() {
69-
; CHECK-LABEL: 'fabs_f16'
70-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
71-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
72-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
73-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
74-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
75-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
76-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
77-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
78-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
79-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
69+
; ZVFH-LABEL: 'fabs_f16'
70+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
71+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
72+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
73+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
74+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
75+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
76+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
77+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
78+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
79+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
80+
;
81+
; ZVFHMIN-LABEL: 'fabs_f16'
82+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
83+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
84+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
85+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
86+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
87+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
88+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
89+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
90+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
91+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
8092
;
8193
call half @llvm.fabs.f16(half undef)
8294
call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)

llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
define void @sqrt() {
66
; CHECK-LABEL: 'sqrt'
77
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.sqrt.bf16(bfloat undef)
8-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> undef)
9-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
10-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
11-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
12-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
13-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
14-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
15-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
8+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> undef)
9+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
14+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
15+
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
1616
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.sqrt.f32(float undef)
1717
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
1818
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
@@ -66,17 +66,29 @@ define void @sqrt() {
6666
}
6767

6868
define void @sqrt_f16() {
69-
; CHECK-LABEL: 'sqrt_f16'
70-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
71-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
72-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
73-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
74-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
75-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
76-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
77-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
78-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
79-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
69+
; ZVFH-LABEL: 'sqrt_f16'
70+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
71+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
72+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
73+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
74+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
75+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
76+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
77+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
78+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
79+
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
80+
;
81+
; ZVFHMIN-LABEL: 'sqrt_f16'
82+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
83+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
84+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
85+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
86+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
87+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
88+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
89+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
90+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
91+
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
8092
;
8193
call half @llvm.sqrt.f16(half undef)
8294
call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)

0 commit comments

Comments
 (0)