Skip to content

Commit 837ee5b

Browse files
authored
[RISCV][TTI] Scale the cost of FP-Int conversion with LMUL (#87506)
Widening/narrowing the source data type to match the destination data type may require multiple steps. To model the costs, the patch generated the interim type by following the logic in RISCVTargetLowering::lowerVPFPIntConvOp.
1 parent 5fe852e commit 837ee5b

File tree

3 files changed

+1720
-1662
lines changed

3 files changed

+1720
-1662
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 84 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,9 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
10611061
DstLT.second.getSizeInBits()))
10621062
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
10631063

1064+
// The split cost is handled by the base getCastInstrCost
1065+
assert((SrcLT.first == 1) && (DstLT.first == 1) && "Illegal type");
1066+
10641067
int ISD = TLI->InstructionOpcodeToISD(Opcode);
10651068
assert(ISD && "Invalid opcode");
10661069

@@ -1118,34 +1121,89 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
11181121
return Cost;
11191122
}
11201123
case ISD::FP_TO_SINT:
1121-
case ISD::FP_TO_UINT:
1122-
// For fp vector to mask, we use:
1123-
// vfncvt.rtz.x.f.w v9, v8
1124-
// vand.vi v8, v9, 1
1125-
// vmsne.vi v0, v8, 0
1126-
if (Dst->getScalarSizeInBits() == 1)
1127-
return 3;
1128-
1129-
if (std::abs(PowDiff) <= 1)
1130-
return 1;
1124+
case ISD::FP_TO_UINT: {
1125+
unsigned IsSigned = ISD == ISD::FP_TO_SINT;
1126+
unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1127+
unsigned FWCVT =
1128+
IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1129+
unsigned FNCVT =
1130+
IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1131+
unsigned SrcEltSize = Src->getScalarSizeInBits();
1132+
unsigned DstEltSize = Dst->getScalarSizeInBits();
1133+
InstructionCost Cost = 0;
1134+
if ((SrcEltSize == 16) &&
1135+
(!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1136+
// If the target only supports zvfhmin or it is fp16-to-i64 conversion
1137+
// pre-widening to f32 and then convert f32 to integer
1138+
VectorType *VecF32Ty =
1139+
VectorType::get(Type::getFloatTy(Dst->getContext()),
1140+
cast<VectorType>(Dst)->getElementCount());
1141+
std::pair<InstructionCost, MVT> VecF32LT =
1142+
getTypeLegalizationCost(VecF32Ty);
1143+
Cost +=
1144+
VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1145+
VecF32LT.second, CostKind);
1146+
Cost += getCastInstrCost(Opcode, Dst, VecF32Ty, CCH, CostKind, I);
1147+
return Cost;
1148+
}
1149+
if (DstEltSize == SrcEltSize)
1150+
Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
1151+
else if (DstEltSize > SrcEltSize)
1152+
Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
1153+
else { // (SrcEltSize > DstEltSize)
1154+
// First do a narrowing conversion to an integer half the size, then
1155+
// truncate if needed.
1156+
MVT ElementVT = MVT::getIntegerVT(SrcEltSize / 2);
1157+
MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1158+
Cost += getRISCVInstructionCost(FNCVT, VecVT, CostKind);
1159+
if ((SrcEltSize / 2) > DstEltSize) {
1160+
Type *VecTy = EVT(VecVT).getTypeForEVT(Dst->getContext());
1161+
Cost +=
1162+
getCastInstrCost(Instruction::Trunc, Dst, VecTy, CCH, CostKind, I);
1163+
}
1164+
}
1165+
return Cost;
1166+
}
1167+
case ISD::SINT_TO_FP:
1168+
case ISD::UINT_TO_FP: {
1169+
unsigned IsSigned = ISD == ISD::SINT_TO_FP;
1170+
unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1171+
unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1172+
unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1173+
unsigned SrcEltSize = Src->getScalarSizeInBits();
1174+
unsigned DstEltSize = Dst->getScalarSizeInBits();
11311175

1132-
// Counts of narrow/widen instructions.
1133-
return std::abs(PowDiff);
1176+
InstructionCost Cost = 0;
1177+
if ((DstEltSize == 16) &&
1178+
(!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1179+
// If the target only supports zvfhmin or it is i64-to-fp16 conversion
1180+
// it is converted to f32 and then converted to f16
1181+
VectorType *VecF32Ty =
1182+
VectorType::get(Type::getFloatTy(Dst->getContext()),
1183+
cast<VectorType>(Dst)->getElementCount());
1184+
std::pair<InstructionCost, MVT> VecF32LT =
1185+
getTypeLegalizationCost(VecF32Ty);
1186+
Cost += getCastInstrCost(Opcode, VecF32Ty, Src, CCH, CostKind, I);
1187+
Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1188+
DstLT.second, CostKind);
1189+
return Cost;
1190+
}
11341191

1135-
case ISD::SINT_TO_FP:
1136-
case ISD::UINT_TO_FP:
1137-
// For mask vector to fp, we should use the following instructions:
1138-
// vmv.v.i v8, 0
1139-
// vmerge.vim v8, v8, -1, v0
1140-
// vfcvt.f.x.v v8, v8
1141-
if (Src->getScalarSizeInBits() == 1)
1142-
return 3;
1143-
1144-
if (std::abs(PowDiff) <= 1)
1145-
return 1;
1146-
// Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),
1147-
// so it only need two conversion.
1148-
return 2;
1192+
if (DstEltSize == SrcEltSize)
1193+
Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
1194+
else if (DstEltSize > SrcEltSize) {
1195+
if ((DstEltSize / 2) > SrcEltSize) {
1196+
VectorType *VecTy =
1197+
VectorType::get(IntegerType::get(Dst->getContext(), DstEltSize / 2),
1198+
cast<VectorType>(Dst)->getElementCount());
1199+
unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1200+
Cost += getCastInstrCost(Op, VecTy, Src, CCH, CostKind, I);
1201+
}
1202+
Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
1203+
} else
1204+
Cost += getRISCVInstructionCost(FNCVT, DstLT.second, CostKind);
1205+
return Cost;
1206+
}
11491207
}
11501208
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
11511209
}

0 commit comments

Comments
 (0)