llvm · davemgreen · Dec 11, 2024 · Dec 9, 2024
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2800,6 +2800,31 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
       {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6},
       {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6},
 
+      // FP Ext and trunc
+      {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},     // fcvt
+      {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1}, // fcvtl
+      {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2}, // fcvtl+fcvtl2
+      //   FP16
+      {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},     // fcvt
+      {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},     // fcvt
+      {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1}, // fcvtl
+      {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2}, // fcvtl+fcvtl2
+      {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2}, // fcvtl+fcvtl
+      {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3}, // fcvtl+fcvtl2+fcvtl
+      {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6}, // 2 * fcvtl+fcvtl2+fcvtl
+      // FP Ext and trunc
+      {ISD::FP_ROUND, MVT::f32, MVT::f64, 1},     // fcvt
+      {ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1}, // fcvtn
+      {ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 2}, // fcvtn+fcvtn2
+      //   FP16
+      {ISD::FP_ROUND, MVT::f16, MVT::f32, 1},     // fcvt
+      {ISD::FP_ROUND, MVT::f16, MVT::f64, 1},     // fcvt
+      {ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, 1}, // fcvtn
+      {ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, 2}, // fcvtn+fcvtn2
+      {ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2}, // fcvtn+fcvtn
+      {ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3}, // fcvtn+fcvtn2+fcvtn
+      {ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+fcvtn
+
       // LowerVectorINT_TO_FP:
       {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
       {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},

diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll
@@ -274,34 +274,34 @@ define i32 @casts_no_users() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %truncf64f16 = fptrunc double undef to half
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %truncv2f64f16 = fptrunc <2 x double> undef to <2 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %truncv2f64f16 = fptrunc <2 x double> undef to <2 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %truncv4f64f16 = fptrunc <4 x double> undef to <4 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %truncv8f64f16 = fptrunc <8 x double> undef to <8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %truncv16f64f16 = fptrunc <16 x double> undef to <16 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %truncv8f64f16 = fptrunc <8 x double> undef to <8 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %truncv16f64f16 = fptrunc <16 x double> undef to <16 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %truncv32f16 = fptrunc float undef to half
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %truncv2f32f16 = fptrunc <2 x float> undef to <2 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %truncv4f32f16 = fptrunc <4 x float> undef to <4 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %truncv8f32f16 = fptrunc <8 x float> undef to <8 x half>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %truncv16f32f16 = fptrunc <16 x float> undef to <16 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %truncv8f32f16 = fptrunc <8 x float> undef to <8 x half>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %truncv16f32f16 = fptrunc <16 x float> undef to <16 x half>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r86 = fpext <2 x float> undef to <2 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %r87 = fpext <4 x float> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %r88 = fpext <8 x float> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %r89 = fpext <16 x float> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r87 = fpext <4 x float> undef to <4 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %r88 = fpext <8 x float> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %r89 = fpext <16 x float> undef to <16 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext half undef to float
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x half> undef to <2 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x half> undef to <4 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %extv8f16f32 = fpext <8 x half> undef to <8 x float>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %extv16f16f32 = fpext <16 x half> undef to <16 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x half> undef to <8 x float>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x half> undef to <16 x float>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %extf16f64 = fpext half undef to double
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f64 = fpext <2 x half> undef to <2 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x half> undef to <2 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x half> undef to <4 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %extv8f16f64 = fpext <8 x half> undef to <8 x double>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %extv16f16f64 = fpext <16 x half> undef to <16 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x half> undef to <8 x double>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x half> undef to <16 x double>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8>

diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll
@@ -600,14 +600,14 @@ define i32 @casts_no_users() {
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r69 = uitofp i64 undef to double
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float>
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float>
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float>
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r86 = fpext <2 x float> undef to <2 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %r87 = fpext <4 x float> undef to <4 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %r88 = fpext <8 x float> undef to <8 x double>
-; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %r89 = fpext <16 x float> undef to <16 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %r87 = fpext <4 x float> undef to <4 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %r88 = fpext <8 x float> undef to <8 x double>
+; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %r89 = fpext <16 x float> undef to <16 x double>
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1>
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1>
 ; CHECK-SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8>