[RISCV][CostModel] Add cost for fabs/fsqrt of type bf16/f16 #118608

LiqinWeng · 2024-12-04T09:25:52Z

No description provided.

llvmbot · 2024-12-04T09:26:27Z

@llvm/pr-subscribers-llvm-analysis

Author: LiqinWeng (LiqinWeng)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/118608.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+32-12)
(modified) llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll (+31-19)
(modified) llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll (+31-19)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index aed476db1956fa..38e9dfe1e44e96 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1035,21 +1035,41 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     }
     break;
   }
-  case Intrinsic::fabs:
+  case Intrinsic::fabs: {
+    auto LT = getTypeLegalizationCost(RetTy);
+    if (ST->hasVInstructions() && LT.second.isVector()) {
+      // lui a0, 8
+      // addi a0, a0, -1
+      // vsetvli a1, zero, e16, m1, ta, ma
+      // vand.vx v8, v8, a0
+      // f16 with zvfhmin and bf16 with zvfhbmin
+      if (LT.second.getVectorElementType() == MVT::bf16 ||
+          (LT.second.getVectorElementType() == MVT::f16 &&
+           !ST->hasVInstructionsF16()))
+        return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
+                                                  CostKind) +
+               2;
+      else
+        return LT.first *
+               getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second, CostKind);
+    }
+    break;
+  }
   case Intrinsic::sqrt: {
     auto LT = getTypeLegalizationCost(RetTy);
-    // TODO: add f16/bf16, bf16 with zvfbfmin && f16 with zvfhmin
     if (ST->hasVInstructions() && LT.second.isVector()) {
-      unsigned Op;
-      switch (ICA.getID()) {
-      case Intrinsic::fabs:
-        Op = RISCV::VFSGNJX_VV;
-        break;
-      case Intrinsic::sqrt:
-        Op = RISCV::VFSQRT_V;
-        break;
-      }
-      return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
+      SmallVector<unsigned, 3> Opcodes;
+      // f16 with zvfhmin and bf16 with zvfbfmin
+      if (LT.second.getVectorElementType() == MVT::bf16 &&
+          ST->hasVInstructionsBF16Minimal())
+        Opcodes = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFSQRT_V,
+                   RISCV::VFNCVTBF16_F_F_W};
+      else if (LT.second.getVectorElementType() == MVT::f16 &&
+               !ST->hasVInstructionsF16())
+        Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFSQRT_V, RISCV::VFNCVT_F_F_W};
+      else
+        Opcodes = {RISCV::VFSQRT_V};
+      return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
     }
     break;
   }
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
index 9eb06a07f135fa..8ff70453194381 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
@@ -5,14 +5,14 @@
 define void @fabs() {
 ; CHECK-LABEL: 'fabs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fabs.bf16(bfloat undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.fabs.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
@@ -66,17 +66,29 @@ define void @fabs() {
 }
 
 define void @fabs_f16() {
-; CHECK-LABEL: 'fabs_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fabs_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fabs_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call half @llvm.fabs.f16(half undef)
   call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
index 446627f6bf3c0e..d3ceb162c13364 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
@@ -5,14 +5,14 @@
 define void @sqrt() {
 ; CHECK-LABEL: 'sqrt'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.sqrt.bf16(bfloat undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.sqrt.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
@@ -66,17 +66,29 @@ define void @sqrt() {
 }
 
 define void @sqrt_f16() {
-; CHECK-LABEL: 'sqrt_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'sqrt_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'sqrt_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call half @llvm.sqrt.f16(half undef)
   call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)

llvmbot · 2024-12-04T09:26:28Z

@llvm/pr-subscribers-backend-risc-v

Author: LiqinWeng (LiqinWeng)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/118608.diff

3 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+32-12)
(modified) llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll (+31-19)
(modified) llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll (+31-19)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index aed476db1956fa..38e9dfe1e44e96 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1035,21 +1035,41 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     }
     break;
   }
-  case Intrinsic::fabs:
+  case Intrinsic::fabs: {
+    auto LT = getTypeLegalizationCost(RetTy);
+    if (ST->hasVInstructions() && LT.second.isVector()) {
+      // lui a0, 8
+      // addi a0, a0, -1
+      // vsetvli a1, zero, e16, m1, ta, ma
+      // vand.vx v8, v8, a0
+      // f16 with zvfhmin and bf16 with zvfhbmin
+      if (LT.second.getVectorElementType() == MVT::bf16 ||
+          (LT.second.getVectorElementType() == MVT::f16 &&
+           !ST->hasVInstructionsF16()))
+        return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
+                                                  CostKind) +
+               2;
+      else
+        return LT.first *
+               getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second, CostKind);
+    }
+    break;
+  }
   case Intrinsic::sqrt: {
     auto LT = getTypeLegalizationCost(RetTy);
-    // TODO: add f16/bf16, bf16 with zvfbfmin && f16 with zvfhmin
     if (ST->hasVInstructions() && LT.second.isVector()) {
-      unsigned Op;
-      switch (ICA.getID()) {
-      case Intrinsic::fabs:
-        Op = RISCV::VFSGNJX_VV;
-        break;
-      case Intrinsic::sqrt:
-        Op = RISCV::VFSQRT_V;
-        break;
-      }
-      return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
+      SmallVector<unsigned, 3> Opcodes;
+      // f16 with zvfhmin and bf16 with zvfbfmin
+      if (LT.second.getVectorElementType() == MVT::bf16 &&
+          ST->hasVInstructionsBF16Minimal())
+        Opcodes = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFSQRT_V,
+                   RISCV::VFNCVTBF16_F_F_W};
+      else if (LT.second.getVectorElementType() == MVT::f16 &&
+               !ST->hasVInstructionsF16())
+        Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFSQRT_V, RISCV::VFNCVT_F_F_W};
+      else
+        Opcodes = {RISCV::VFSQRT_V};
+      return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
     }
     break;
   }
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
index 9eb06a07f135fa..8ff70453194381 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
@@ -5,14 +5,14 @@
 define void @fabs() {
 ; CHECK-LABEL: 'fabs'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.fabs.bf16(bfloat undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.fabs.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
@@ -66,17 +66,29 @@ define void @fabs() {
 }
 
 define void @fabs_f16() {
-; CHECK-LABEL: 'fabs_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fabs_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fabs_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call half @llvm.fabs.f16(half undef)
   call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
index 446627f6bf3c0e..d3ceb162c13364 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
@@ -5,14 +5,14 @@
 define void @sqrt() {
 ; CHECK-LABEL: 'sqrt'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call bfloat @llvm.sqrt.bf16(bfloat undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.sqrt.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
@@ -66,17 +66,29 @@ define void @sqrt() {
 }
 
 define void @sqrt_f16() {
-; CHECK-LABEL: 'sqrt_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'sqrt_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'sqrt_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.sqrt.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call half @llvm.sqrt.f16(half undef)
   call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

topperc · 2024-12-09T21:57:29Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

-      return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
+      SmallVector<unsigned, 3> Opcodes;
+      // f16 with zvfhmin and bf16 with zvfbfmin
+      if (LT.second.getVectorElementType() == MVT::bf16)


Do we need to account for <vscale x 32 x f16> and <vscale x 32 x bf16> needing to be split before we can widen it? The widening of those types would produce an LMUL=16 value so we have to split first.

It seems is legal for <vscale x 32 x f16> and <vscale x 32 x bf16>, at now LT.first =1, but from asssemble really spilt . I'm not sure how to set the conditions for spilt, because in theory this scenario is also a split

cost of vscale x 16 x f16 is 12, whther cost of vscale x 32 x f16 is 24 and vscale x 64 x f16 is 48????

formula：

Because the SpiltOpcodes == Opcodes, (LT.first - 1 ) * getRISCVInstructionCost(SpiltOpcodes , LT.second, CostKind) + getRISCVInstructionCost(Opcodes, LT.second, CostKind) ----> LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind)

The real issue is that the vfsqrt cost needs to use an f32 type and that type needs to be split if it isn't legal.

It has been custom and has not used f32 type. So I get the LT.first == 1 (LT = getTypeLegalizationCost(RetTy)), and get LK.first is TypeLegal (TargetLoweringBase::LegalizeKind LK = getTLI()->getTypeConversion(C, MTy);)

From the following it seems that the cost calculation is correct??? （cost of vscale x 16 x f16 is 12, whther cost of vscale x 32 x f16 is 24 ）

I don't think 12 is the correct cost for <vscale x 16 x f16>. That cost did not use f32 for the vsqrt.v cost.

The cost for <vscale x 4 x bfloat> should be at least 4 and possibly 5 or 6, not 3. There's a vfwcvt.f.f.v from EEW=16 EMUL=m1 to EEW=32 EMUL=m2. That's either a cost of 1 if the hardware can produce an EMUL=2 result in 1 cycle or its 2 if it takes 2 cycles to produce a wide result.

Then there's a EEW=32 EMUL=2 vfsqrt.v. That should be a cost of 2 since the EMUL is 2.

Then there's a vfncvt.f.f.w from EEW=32 EMUL=2 to EEW=16 EMUL=1. That's either 1 cycle if the hardware can narrow EMUL=2 in 1 cycle. Or it's a cost of 2.

So the total cost is either (1 + 2 + 1) or (2 + 2 + 1) or (1 + 2 + 2) or (2 + 2 + 2).

The cost use f32 for the vsqrt.v cost. Split have been cost * 2 in the getTypeLegalizationCost.

topperc · 2024-12-10T18:51:58Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+        Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFSQRT_V, RISCV::VFNCVT_F_F_W};
+      else
+        Opcodes = {RISCV::VFSQRT_V};
+      return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);


When widening, the cost for the VFSQRT_V needs to use the f32 type not the f16 type. The LMUL is twice as large so that needs to be accounted for.

I'm not sure what LMUL should be used for VFWCVT_F_F_V and VFNCVT_F_F_W. @lukel97 @arcbbb ?

I think both VFWCVT_F_F_V and VFNCVT_F_F_W should use the widened LMUL type, since presumably we should cost things based on the largest EMUL in the instruction, and that seems to match the spacemit-x60? https://camel-cdr.github.io/rvv-bench-results/bpi_f3/index.html

The easiest thing is probably just to update LT.second's element type to f32 to double the LMUL.

I'm not sure it's worthwhile trying to be fancy and handle it in getRISCVInstructionCost

I update LT.second's element type to f32 to double the LMUL， pls review

getRISCVInstructionCost uses the vtype associated with each instruction:

For vfwcvt.f.f.v, the source is (f16, LMUL1), the destination is (f32, LMUL2), and the vtype is (f16, LMUL).

For vfncvt.f.f.w, the source is (f32, LMUL2), the destination is (f16, LMUL1), and the vtype is (f16, LMUL).

Because the number of uOps required for these operations varies by hardware and are not defined in the ISA, getRISCVInstructionCost takes only the vtype into account.

I want to determine the following questions：

should VFWCVT_F_F_V and VFNCVT_F_F_W use the widened LMUL type？

should VFWCVT_F_F_V and VFNCVT_F_F_W use the type of f16

I update all opcode Vtype as the vfsqrt type

github-actions · 2024-12-16T06:38:41Z

✅ With the latest revision this PR passed the undef deprecator.

topperc · 2024-12-16T17:41:22Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+      if (LT.second.getVectorElementType() == MVT::bf16) {
+        Opcodes = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFSQRT_V,
+                   RISCV::VFNCVTBF16_F_F_W};
+        NVT = TLI->getTypeToPromoteTo(ISD::FSQRT, NVT);


I don't think this works for <vscale x 32 x bfloat>. We use setOperationAction(ISD::FSQRT, MVT::v32bf16, Custom) instead of Promote.

This is indeed a problem. Is the cost of <vscale x 32 x bfloat> double the cost of <vscale x 16x bfloat>？

topperc · 2024-12-16T17:41:55Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+      } else if (LT.second.getVectorElementType() == MVT::f16 &&
+                 !ST->hasVInstructionsF16()) {
+        Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFSQRT_V, RISCV::VFNCVT_F_F_W};
+        NVT = TLI->getTypeToPromoteTo(ISD::FSQRT, NVT);


This doesn't work for <vscale x 32 x half>

Only LUML = 8， bf16/f16 are not promoted. Can I handle this scenario alone, or do you have any better suggestions?

topperc · 2024-12-27T02:45:44Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

-        NVT = TLI->getTypeToPromoteTo(ISD::FSQRT, NVT);
+        if (LT.second == MVT::nxv32f16) {
+          Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
+                     RISCV::VFSQRT_V,     RISCV::VFSQRT_V,


NVT needs to be changed to v16f32.

Can you help me see the code check? I feel that there is no problem with the code, but the code check fails.

@LiqinWeng Are you trying to compute the cost for ISAs without f16 support as (vfwcvt, nxvf16) + (vfsqrt, nxvf32) + (vfncvt, nxv*f16)? From the current code, it doesn’t seem like that’s what’s being implemented.

Under what circumstances will it occur: (vfwcvt, nxvf16) + (vfsqrt, nxvf32) + (vfncvt, nxv*f16)？？？I don't quite understand ‘ISAs without f16’？ do you means the mattr only support zvfh？？？

There's a vfwcvt.f.f.v from EEW=16 EMUL=m1 to EEW=32 EMUL=m2; Shouldn't the type of vfwcvt be nxvf32, the dest eew is 32

@LiqinWeng vfwcvt is a widening arithmetic instruction. Based on the spec description, the destination EEW is 2 * SEW, so the vtype is nxvf16 and the destination is nxvf32.

10.2. Widening Vector Arithmetic Instructions A few vector arithmetic instructions are defined to be widening operations where the destination vector register group has EEW=2*SEW and EMUL=2*LMUL.

On the other hand, for narrowing arithmetic instruction such as vfncvt, the destination EEW is SEW, so both the vtype and the destination are nxvf16.

Is this the case with this instruction(vfwcvtbf16.f.f.v )?

hmm.. I think vfwcvtbf16.f.f.v is the same with vfwcvt.f.f.v.

4.4. vfwcvtbf16.f.f.v Synopsis Vector convert BF16 to FP32 Description This instruction is similar to vfwcvt.f.f.v which converts a floating-point value in an SEW-width format into a 2*SEW-width format. However, here the SEW-width format is limited to BF16.

topperc · 2025-01-02T16:10:13Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+          Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
+                     RISCV::VFSQRT_V,     RISCV::VFSQRT_V,
+                     RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
+          NVT = TLI->getTypeToPromoteTo(ISD::FSQRT,


You can't use getTypeToPromoteTo. That only works when LegalizeVectorOps/LegalizeDAG does the promotion via setOperationAction(ISD::FSQRT, VT, Promote). This case uses setOperationAction(ISD::FSQRT, MVT::nxv32f16, Custom) and there is custom code in RISCVISelLowering.cpp to do the splitting. You'll need to hardcode the type has MVT::nxv16f32.

arcbbb · 2025-01-10T06:51:48Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+        if (LT.second == MVT::nxv32bf16) {
+          ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
+                    RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
+          FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
+          ConvType = MVT::nxv16f16;
+          FsqrtType = MVT::nxv16f32;


To handle nxv32bf16, you need 2 iterations of (vfwcvt nxv16bf16), (vfsqrt nxv16f32), and (vfncvt nxv16bf16).
The cost could therefore be LT.first * 2 * (vfwcvt + vfsqrt + vfncvt).

the ConvOp have been spilt, so neednt * 2

Ah, I see now. I overlooked the duplicated opcode.

arcbbb · 2025-01-10T06:53:01Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+        if (LT.second == MVT::nxv32f16) {
+          ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
+                    RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
+          FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
+          ConvType = MVT::nxv16f16;
+          FsqrtType = MVT::nxv16f32;


the ConvOp have been spilt, so neednt * 2 , same as FsqrtOp

arcbbb · 2025-01-10T06:56:21Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+        } else {
+          ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
+          FsqrtOp = {RISCV::VFSQRT_V};
+          FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);


I am not sure the reason getTypeToPromoteTo is needed here and below.

It converts the f16 type to the f32 type as long as there is a setOperationAction for ISD::FSQRT call in RISCVISelLowering.cpp with Promote.

Understood. Thanks for the explanation!

arcbbb

LGTM.

)

LiqinWeng requested a review from lukel97 December 4, 2024 09:25

llvmbot added backend:RISC-V llvm:analysis Includes value tracking, cost tables and constant folding labels Dec 4, 2024

topperc reviewed Dec 7, 2024

View reviewed changes

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp Outdated Show resolved Hide resolved

topperc reviewed Dec 9, 2024

View reviewed changes

topperc reviewed Dec 10, 2024

View reviewed changes

LiqinWeng added 3 commits December 16, 2024 19:36

[RISCV][CostModel] Add cost for fabs/fsqrt of type bf16/f16

48dfe67

Remove the condition for sqrts

e682e00

Promote the bf16/f16 to fp32 to calculate the cost

86bd2f7

LiqinWeng force-pushed the fabs-fsqrt-f16-bf16-cost branch from 9a5f0c1 to 86bd2f7 Compare December 16, 2024 11:37

topperc reviewed Dec 16, 2024

View reviewed changes

LiqinWeng added 2 commits December 23, 2024 20:22

Merge branch 'main' into fabs-fsqrt-f16-bf16-cost

0b5f44e

Add cost for fsqrt of type nxv32[b]f16

1fe4655

topperc reviewed Dec 27, 2024

View reviewed changes

update nxv32[b]f16 to nxv16f32

b7c046f

topperc reviewed Jan 2, 2025

View reviewed changes

LiqinWeng added 3 commits January 5, 2025 14:40

Merge branch 'main' into fabs-fsqrt-f16-bf16-cost

3bb8859

fix the coments and clang-format for test

1a8c67e

fix the vtype for fcvt

5d2432f

arcbbb reviewed Jan 10, 2025

View reviewed changes

arcbbb approved these changes Jan 10, 2025

View reviewed changes

LiqinWeng merged commit 98e5962 into llvm:main Jan 10, 2025
8 checks passed

BaiXilin pushed a commit to BaiXilin/llvm-fix-vnni-instr-types that referenced this pull request Jan 12, 2025

[RISCV][CostModel] Add cost for fabs/fsqrt of type bf16/f16 (llvm#118608

8773ba0

)

[RISCV][CostModel] Add cost for fabs/fsqrt of type bf16/f16 #118608

[RISCV][CostModel] Add cost for fabs/fsqrt of type bf16/f16 #118608

Uh oh!

Conversation

LiqinWeng commented Dec 4, 2024

Uh oh!

llvmbot commented Dec 4, 2024

Uh oh!

llvmbot commented Dec 4, 2024

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

LiqinWeng Dec 10, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

LiqinWeng Dec 11, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Dec 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc Dec 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

arcbbb Jan 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

topperc Jan 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

LiqinWeng Dec 10, 2024 •

edited

Loading

LiqinWeng Dec 11, 2024 •

edited

Loading

github-actions bot commented Dec 16, 2024 •

edited

Loading

topperc Dec 16, 2024 •

edited

Loading

arcbbb Jan 7, 2025 •

edited

Loading

topperc Jan 2, 2025 •

edited

Loading

arcbbb Jan 10, 2025 •

edited

Loading

LiqinWeng Jan 10, 2025 •

edited

Loading