Skip to content

Commit f737df7

Browse files
authored
[AArch64][CostModel] Increase the cost of illegal SVE int-to-fp converts (#130756)
If a scalable vector uitofp or sitofp effectively extends the size of each element as part of the conversion, the AArch64 backend may need to plant multiple unpacks before converting. Increase the cost in those cases to account for this.
1 parent 36b3606 commit f737df7

File tree

3 files changed

+402
-64
lines changed

3 files changed

+402
-64
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2811,6 +2811,17 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
28112811
BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
28122812
return AdjustCost(Entry->Cost);
28132813

2814+
// Symbolic constants for the SVE sitofp/uitofp entries in the table below
2815+
// The cost of unpacking twice is artificially increased for now in order
2816+
// to avoid regressions against NEON, which will use tbl instructions directly
2817+
// instead of multiple layers of [s|u]unpk[lo|hi].
2818+
// We use the unpacks in cases where the destination type is illegal and
2819+
// requires splitting of the input, even if the input type itself is legal.
2820+
const unsigned int SVE_EXT_COST = 1;
2821+
const unsigned int SVE_FCVT_COST = 1;
2822+
const unsigned int SVE_UNPACK_ONCE = 4;
2823+
const unsigned int SVE_UNPACK_TWICE = 16;
2824+
28142825
static const TypeConversionCostTblEntry ConversionTbl[] = {
28152826
{ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1}, // xtn
28162827
{ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1}, // xtn
@@ -2936,6 +2947,42 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29362947
{ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
29372948
{ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1},
29382949

2950+
// SVE: to nxv2f16
2951+
{ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i8,
2952+
SVE_EXT_COST + SVE_FCVT_COST},
2953+
{ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
2954+
{ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
2955+
{ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
2956+
{ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i8,
2957+
SVE_EXT_COST + SVE_FCVT_COST},
2958+
{ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
2959+
{ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
2960+
{ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
2961+
2962+
// SVE: to nxv4f16
2963+
{ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i8,
2964+
SVE_EXT_COST + SVE_FCVT_COST},
2965+
{ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
2966+
{ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
2967+
{ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i8,
2968+
SVE_EXT_COST + SVE_FCVT_COST},
2969+
{ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
2970+
{ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
2971+
2972+
// SVE: to nxv8f16
2973+
{ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i8,
2974+
SVE_EXT_COST + SVE_FCVT_COST},
2975+
{ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
2976+
{ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i8,
2977+
SVE_EXT_COST + SVE_FCVT_COST},
2978+
{ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
2979+
2980+
// SVE: to nxv16f16
2981+
{ISD::SINT_TO_FP, MVT::nxv16f16, MVT::nxv16i8,
2982+
SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
2983+
{ISD::UINT_TO_FP, MVT::nxv16f16, MVT::nxv16i8,
2984+
SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
2985+
29392986
// Complex: to v2f32
29402987
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
29412988
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3},
@@ -2944,18 +2991,56 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29442991
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3},
29452992
{ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2},
29462993

2994+
// SVE: to nxv2f32
2995+
{ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i8,
2996+
SVE_EXT_COST + SVE_FCVT_COST},
2997+
{ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
2998+
{ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
2999+
{ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3000+
{ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i8,
3001+
SVE_EXT_COST + SVE_FCVT_COST},
3002+
{ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
3003+
{ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
3004+
{ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3005+
29473006
// Complex: to v4f32
29483007
{ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4},
29493008
{ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
29503009
{ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
29513010
{ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
29523011

3012+
// SVE: to nxv4f32
3013+
{ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i8,
3014+
SVE_EXT_COST + SVE_FCVT_COST},
3015+
{ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3016+
{ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3017+
{ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i8,
3018+
SVE_EXT_COST + SVE_FCVT_COST},
3019+
{ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3020+
{ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3021+
29533022
// Complex: to v8f32
29543023
{ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
29553024
{ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4},
29563025
{ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
29573026
{ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4},
29583027

3028+
// SVE: to nxv8f32
3029+
{ISD::SINT_TO_FP, MVT::nxv8f32, MVT::nxv8i8,
3030+
SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3031+
{ISD::SINT_TO_FP, MVT::nxv8f32, MVT::nxv8i16,
3032+
SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3033+
{ISD::UINT_TO_FP, MVT::nxv8f32, MVT::nxv8i8,
3034+
SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3035+
{ISD::UINT_TO_FP, MVT::nxv8f32, MVT::nxv8i16,
3036+
SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3037+
3038+
// SVE: to nxv16f32
3039+
{ISD::SINT_TO_FP, MVT::nxv16f32, MVT::nxv16i8,
3040+
SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3041+
{ISD::UINT_TO_FP, MVT::nxv16f32, MVT::nxv16i8,
3042+
SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3043+
29593044
// Complex: to v16f32
29603045
{ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21},
29613046
{ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21},
@@ -2968,10 +3053,46 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29683053
{ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4},
29693054
{ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2},
29703055

3056+
// SVE: to nxv2f64
3057+
{ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i8,
3058+
SVE_EXT_COST + SVE_FCVT_COST},
3059+
{ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3060+
{ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3061+
{ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3062+
{ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i8,
3063+
SVE_EXT_COST + SVE_FCVT_COST},
3064+
{ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3065+
{ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3066+
{ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3067+
29713068
// Complex: to v4f64
29723069
{ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 4},
29733070
{ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 4},
29743071

3072+
// SVE: to nxv4f64
3073+
{ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i8,
3074+
SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3075+
{ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i16,
3076+
SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3077+
{ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i32,
3078+
SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3079+
{ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i8,
3080+
SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3081+
{ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i16,
3082+
SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3083+
{ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i32,
3084+
SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3085+
3086+
// SVE: to nxv8f64
3087+
{ISD::SINT_TO_FP, MVT::nxv8f64, MVT::nxv8i8,
3088+
SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3089+
{ISD::SINT_TO_FP, MVT::nxv8f64, MVT::nxv8i16,
3090+
SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3091+
{ISD::UINT_TO_FP, MVT::nxv8f64, MVT::nxv8i8,
3092+
SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3093+
{ISD::UINT_TO_FP, MVT::nxv8f64, MVT::nxv8i16,
3094+
SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3095+
29753096
// LowerVectorFP_TO_INT
29763097
{ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1},
29773098
{ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1},

0 commit comments

Comments
 (0)