@@ -1061,6 +1061,9 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
1061
1061
DstLT.second .getSizeInBits ()))
1062
1062
return BaseT::getCastInstrCost (Opcode, Dst, Src, CCH, CostKind, I);
1063
1063
1064
+ // The split cost is handled by the base getCastInstrCost
1065
+ assert ((SrcLT.first == 1 ) && (DstLT.first == 1 ) && " Illegal type" );
1066
+
1064
1067
int ISD = TLI->InstructionOpcodeToISD (Opcode);
1065
1068
assert (ISD && " Invalid opcode" );
1066
1069
@@ -1118,34 +1121,89 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
1118
1121
return Cost;
1119
1122
}
1120
1123
case ISD::FP_TO_SINT:
1121
- case ISD::FP_TO_UINT:
1122
- // For fp vector to mask, we use:
1123
- // vfncvt.rtz.x.f.w v9, v8
1124
- // vand.vi v8, v9, 1
1125
- // vmsne.vi v0, v8, 0
1126
- if (Dst->getScalarSizeInBits () == 1 )
1127
- return 3 ;
1128
-
1129
- if (std::abs (PowDiff) <= 1 )
1130
- return 1 ;
1124
+ case ISD::FP_TO_UINT: {
1125
+ unsigned IsSigned = ISD == ISD::FP_TO_SINT;
1126
+ unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1127
+ unsigned FWCVT =
1128
+ IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1129
+ unsigned FNCVT =
1130
+ IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1131
+ unsigned SrcEltSize = Src->getScalarSizeInBits ();
1132
+ unsigned DstEltSize = Dst->getScalarSizeInBits ();
1133
+ InstructionCost Cost = 0 ;
1134
+ if ((SrcEltSize == 16 ) &&
1135
+ (!ST->hasVInstructionsF16 () || ((DstEltSize / 2 ) > SrcEltSize))) {
1136
+ // If the target only supports zvfhmin or it is fp16-to-i64 conversion
1137
+ // pre-widening to f32 and then convert f32 to integer
1138
+ VectorType *VecF32Ty =
1139
+ VectorType::get (Type::getFloatTy (Dst->getContext ()),
1140
+ cast<VectorType>(Dst)->getElementCount ());
1141
+ std::pair<InstructionCost, MVT> VecF32LT =
1142
+ getTypeLegalizationCost (VecF32Ty);
1143
+ Cost +=
1144
+ VecF32LT.first * getRISCVInstructionCost (RISCV::VFWCVT_F_F_V,
1145
+ VecF32LT.second , CostKind);
1146
+ Cost += getCastInstrCost (Opcode, Dst, VecF32Ty, CCH, CostKind, I);
1147
+ return Cost;
1148
+ }
1149
+ if (DstEltSize == SrcEltSize)
1150
+ Cost += getRISCVInstructionCost (FCVT, DstLT.second , CostKind);
1151
+ else if (DstEltSize > SrcEltSize)
1152
+ Cost += getRISCVInstructionCost (FWCVT, DstLT.second , CostKind);
1153
+ else { // (SrcEltSize > DstEltSize)
1154
+ // First do a narrowing conversion to an integer half the size, then
1155
+ // truncate if needed.
1156
+ MVT ElementVT = MVT::getIntegerVT (SrcEltSize / 2 );
1157
+ MVT VecVT = DstLT.second .changeVectorElementType (ElementVT);
1158
+ Cost += getRISCVInstructionCost (FNCVT, VecVT, CostKind);
1159
+ if ((SrcEltSize / 2 ) > DstEltSize) {
1160
+ Type *VecTy = EVT (VecVT).getTypeForEVT (Dst->getContext ());
1161
+ Cost +=
1162
+ getCastInstrCost (Instruction::Trunc, Dst, VecTy, CCH, CostKind, I);
1163
+ }
1164
+ }
1165
+ return Cost;
1166
+ }
1167
+ case ISD::SINT_TO_FP:
1168
+ case ISD::UINT_TO_FP: {
1169
+ unsigned IsSigned = ISD == ISD::SINT_TO_FP;
1170
+ unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1171
+ unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1172
+ unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1173
+ unsigned SrcEltSize = Src->getScalarSizeInBits ();
1174
+ unsigned DstEltSize = Dst->getScalarSizeInBits ();
1131
1175
1132
- // Counts of narrow/widen instructions.
1133
- return std::abs (PowDiff);
1176
+ InstructionCost Cost = 0 ;
1177
+ if ((DstEltSize == 16 ) &&
1178
+ (!ST->hasVInstructionsF16 () || ((SrcEltSize / 2 ) > DstEltSize))) {
1179
+ // If the target only supports zvfhmin or it is i64-to-fp16 conversion
1180
+ // it is converted to f32 and then converted to f16
1181
+ VectorType *VecF32Ty =
1182
+ VectorType::get (Type::getFloatTy (Dst->getContext ()),
1183
+ cast<VectorType>(Dst)->getElementCount ());
1184
+ std::pair<InstructionCost, MVT> VecF32LT =
1185
+ getTypeLegalizationCost (VecF32Ty);
1186
+ Cost += getCastInstrCost (Opcode, VecF32Ty, Src, CCH, CostKind, I);
1187
+ Cost += VecF32LT.first * getRISCVInstructionCost (RISCV::VFNCVT_F_F_W,
1188
+ DstLT.second , CostKind);
1189
+ return Cost;
1190
+ }
1134
1191
1135
- case ISD::SINT_TO_FP:
1136
- case ISD::UINT_TO_FP:
1137
- // For mask vector to fp, we should use the following instructions:
1138
- // vmv.v.i v8, 0
1139
- // vmerge.vim v8, v8, -1, v0
1140
- // vfcvt.f.x.v v8, v8
1141
- if (Src->getScalarSizeInBits () == 1 )
1142
- return 3 ;
1143
-
1144
- if (std::abs (PowDiff) <= 1 )
1145
- return 1 ;
1146
- // Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),
1147
- // so it only need two conversion.
1148
- return 2 ;
1192
+ if (DstEltSize == SrcEltSize)
1193
+ Cost += getRISCVInstructionCost (FCVT, DstLT.second , CostKind);
1194
+ else if (DstEltSize > SrcEltSize) {
1195
+ if ((DstEltSize / 2 ) > SrcEltSize) {
1196
+ VectorType *VecTy =
1197
+ VectorType::get (IntegerType::get (Dst->getContext (), DstEltSize / 2 ),
1198
+ cast<VectorType>(Dst)->getElementCount ());
1199
+ unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1200
+ Cost += getCastInstrCost (Op, VecTy, Src, CCH, CostKind, I);
1201
+ }
1202
+ Cost += getRISCVInstructionCost (FWCVT, DstLT.second , CostKind);
1203
+ } else
1204
+ Cost += getRISCVInstructionCost (FNCVT, DstLT.second , CostKind);
1205
+ return Cost;
1206
+ }
1149
1207
}
1150
1208
return BaseT::getCastInstrCost (Opcode, Dst, Src, CCH, CostKind, I);
1151
1209
}
0 commit comments