Skip to content

Commit cb68022

Browse files
committed
[RISCV][CostModel] Updates reduction and shuffle cost
- Make VMV_S_* and VMV_*_S cost independent of LMUL - Uses getRISCVInstructionCost() in reduction cost Add SplitCost for lmul larger than 8. e.g. The cost of vredsum on [vscale x 16 x i64] will be the cost of vadd on [vscale x 8 x i64] plus the cost of vredsum on [vscale x 8 x i64].
1 parent 0e7199c commit cb68022

17 files changed

+764
-695
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 107 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
4646
InstructionCost Cost = 0;
4747
for (auto Op : OpCodes) {
4848
switch (Op) {
49+
case RISCV::SLT:
50+
Cost += 1;
51+
break;
4952
case RISCV::VRGATHER_VI:
5053
Cost += TLI->getVRGatherVICost(VT);
5154
break;
@@ -84,8 +87,14 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
8487
Cost += VL;
8588
break;
8689
}
90+
case RISCV::VMV_X_S:
91+
case RISCV::VFMV_F_S:
92+
Cost += 1;
93+
break;
8794
case RISCV::VMV_S_X:
88-
// FIXME: VMV_S_X doesn't use LMUL, the cost should be 1
95+
case RISCV::VFMV_S_F:
96+
Cost += 1;
97+
break;
8998
default:
9099
Cost += LMULCost;
91100
}
@@ -444,9 +453,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
444453
// vmv.s.x v0, a0
445454
// vmerge.vvm v8, v9, v8, v0
446455
return LT.first *
447-
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for li
448-
getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
449-
LT.second, CostKind));
456+
(1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
457+
LT.second, CostKind));
450458
}
451459
case TTI::SK_Broadcast: {
452460
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
@@ -459,9 +467,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
459467
// vmv.v.x v8, a0
460468
// vmsne.vi v0, v8, 0
461469
return LT.first *
462-
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi
463-
getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
464-
LT.second, CostKind));
470+
(1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
471+
LT.second, CostKind));
465472
}
466473
// Example sequence:
467474
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -473,12 +480,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
473480
// vmsne.vi v0, v8, 0
474481

475482
return LT.first *
476-
(TLI->getLMULCost(LT.second) + // FIXME: this should be 1 for andi
477-
TLI->getLMULCost(
478-
LT.second) + // FIXME: vmv.x.s is the same as extractelement
479-
getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
480-
RISCV::VMV_V_X, RISCV::VMSNE_VI},
481-
LT.second, CostKind));
483+
(1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
484+
RISCV::VMV_X_S, RISCV::VMV_V_X,
485+
RISCV::VMSNE_VI},
486+
LT.second, CostKind));
482487
}
483488

484489
if (HasScalar) {
@@ -523,9 +528,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
523528
if (LT.second.isFixedLengthVector())
524529
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
525530
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
526-
// FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX}
527-
InstructionCost GatherCost =
528-
2 + getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
531+
InstructionCost GatherCost = getRISCVInstructionCost(
532+
{RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV}, LT.second,
533+
CostKind);
529534
// Mask operation additionally required extend and truncate
530535
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
531536
return LT.first * (LenCost + GatherCost + ExtendCost);
@@ -1358,19 +1363,53 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
13581363
return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
13591364

13601365
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1361-
if (Ty->getElementType()->isIntegerTy(1))
1362-
// vcpop sequences, see vreduction-mask.ll. umax, smin actually only
1363-
// cost 2, but we don't have enough info here so we slightly over cost.
1364-
return (LT.first - 1) + 3;
1366+
std::array<unsigned, 3> Opcodes;
1367+
if (Ty->getElementType()->isIntegerTy(1)) {
1368+
// vcpop sequences, see vreduction-mask.ll.
1369+
if ((IID == Intrinsic::umax) || (IID == Intrinsic::smin))
1370+
Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M, RISCV::SLT};
1371+
else
1372+
Opcodes = {RISCV::VCPOP_M, RISCV::SLT};
1373+
return (LT.first - 1) +
1374+
getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1375+
}
13651376

13661377
// IR Reduction is composed by two vmv and one rvv reduction instruction.
1367-
InstructionCost BaseCost = 2;
1368-
1369-
if (CostKind == TTI::TCK_CodeSize)
1370-
return (LT.first - 1) + BaseCost;
1371-
1372-
unsigned VL = getEstimatedVLFor(Ty);
1373-
return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
1378+
unsigned SplitOp;
1379+
switch (IID) {
1380+
default:
1381+
llvm_unreachable("Unsupported intrinsic");
1382+
case Intrinsic::smax:
1383+
SplitOp = RISCV::VMAX_VV;
1384+
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1385+
break;
1386+
case Intrinsic::smin:
1387+
SplitOp = RISCV::VMIN_VV;
1388+
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1389+
break;
1390+
case Intrinsic::umax:
1391+
SplitOp = RISCV::VMAXU_VV;
1392+
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1393+
break;
1394+
case Intrinsic::umin:
1395+
SplitOp = RISCV::VMINU_VV;
1396+
Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1397+
break;
1398+
case Intrinsic::maxnum:
1399+
SplitOp = RISCV::VFMAX_VV;
1400+
Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1401+
break;
1402+
case Intrinsic::minnum:
1403+
SplitOp = RISCV::VFMIN_VV;
1404+
Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1405+
break;
1406+
}
1407+
// Add a cost for data larger than LMUL8
1408+
InstructionCost SplitCost =
1409+
(LT.first > 1) ? (LT.first - 1) *
1410+
getRISCVInstructionCost(SplitOp, LT.second, CostKind)
1411+
: 0;
1412+
return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
13741413
}
13751414

13761415
InstructionCost
@@ -1392,20 +1431,50 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
13921431
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
13931432

13941433
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
1395-
if (Ty->getElementType()->isIntegerTy(1))
1434+
std::array<unsigned, 3> Opcodes;
1435+
if (Ty->getElementType()->isIntegerTy(1)) {
13961436
// vcpop sequences, see vreduction-mask.ll
1397-
return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
1437+
if (ISD == ISD::AND)
1438+
Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M, RISCV::SLT};
1439+
else
1440+
Opcodes = {RISCV::VCPOP_M, RISCV::SLT};
1441+
return (LT.first - 1) +
1442+
getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1443+
}
13981444

13991445
// IR Reduction is composed by two vmv and one rvv reduction instruction.
1400-
InstructionCost BaseCost = 2;
1401-
1402-
if (CostKind == TTI::TCK_CodeSize)
1403-
return (LT.first - 1) + BaseCost;
1404-
1405-
unsigned VL = getEstimatedVLFor(Ty);
1406-
if (TTI::requiresOrderedReduction(FMF))
1407-
return (LT.first - 1) + BaseCost + VL;
1408-
return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
1446+
unsigned SplitOp;
1447+
switch (ISD) {
1448+
case ISD::ADD:
1449+
SplitOp = RISCV::VADD_VV;
1450+
Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1451+
break;
1452+
case ISD::OR:
1453+
SplitOp = RISCV::VOR_VV;
1454+
Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
1455+
break;
1456+
case ISD::XOR:
1457+
SplitOp = RISCV::VXOR_VV;
1458+
Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1459+
break;
1460+
case ISD::AND:
1461+
SplitOp = RISCV::VAND_VV;
1462+
Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
1463+
break;
1464+
case ISD::FADD:
1465+
SplitOp = RISCV::VFADD_VV;
1466+
if (TTI::requiresOrderedReduction(FMF))
1467+
Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDOSUM_VS, RISCV::VFMV_F_S};
1468+
else
1469+
Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1470+
break;
1471+
}
1472+
// Add a cost for data larger than LMUL8
1473+
InstructionCost SplitCost =
1474+
(LT.first > 1) ? (LT.first - 1) *
1475+
getRISCVInstructionCost(SplitOp, LT.second, CostKind)
1476+
: 0;
1477+
return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
14091478
}
14101479

14111480
InstructionCost RISCVTTIImpl::getExtendedReductionCost(

0 commit comments

Comments
 (0)