@@ -46,6 +46,9 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
46
46
InstructionCost Cost = 0 ;
47
47
for (auto Op : OpCodes) {
48
48
switch (Op) {
49
+ case RISCV::SLT:
50
+ Cost += 1 ;
51
+ break ;
49
52
case RISCV::VRGATHER_VI:
50
53
Cost += TLI->getVRGatherVICost (VT);
51
54
break ;
@@ -84,8 +87,14 @@ RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
84
87
Cost += VL;
85
88
break ;
86
89
}
90
+ case RISCV::VMV_X_S:
91
+ case RISCV::VFMV_F_S:
92
+ Cost += 1 ;
93
+ break ;
87
94
case RISCV::VMV_S_X:
88
- // FIXME: VMV_S_X doesn't use LMUL, the cost should be 1
95
+ case RISCV::VFMV_S_F:
96
+ Cost += 1 ;
97
+ break ;
89
98
default :
90
99
Cost += LMULCost;
91
100
}
@@ -444,9 +453,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
444
453
// vmv.s.x v0, a0
445
454
// vmerge.vvm v8, v9, v8, v0
446
455
return LT.first *
447
- (TLI->getLMULCost (LT.second ) + // FIXME: should be 1 for li
448
- getRISCVInstructionCost ({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
449
- LT.second , CostKind));
456
+ (1 + getRISCVInstructionCost ({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
457
+ LT.second , CostKind));
450
458
}
451
459
case TTI::SK_Broadcast: {
452
460
bool HasScalar = (Args.size () > 0 ) && (Operator::getOpcode (Args[0 ]) ==
@@ -459,9 +467,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
459
467
// vmv.v.x v8, a0
460
468
// vmsne.vi v0, v8, 0
461
469
return LT.first *
462
- (TLI->getLMULCost (LT.second ) + // FIXME: should be 1 for andi
463
- getRISCVInstructionCost ({RISCV::VMV_V_X, RISCV::VMSNE_VI},
464
- LT.second , CostKind));
470
+ (1 + getRISCVInstructionCost ({RISCV::VMV_V_X, RISCV::VMSNE_VI},
471
+ LT.second , CostKind));
465
472
}
466
473
// Example sequence:
467
474
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -473,12 +480,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
473
480
// vmsne.vi v0, v8, 0
474
481
475
482
return LT.first *
476
- (TLI->getLMULCost (LT.second ) + // FIXME: this should be 1 for andi
477
- TLI->getLMULCost (
478
- LT.second ) + // FIXME: vmv.x.s is the same as extractelement
479
- getRISCVInstructionCost ({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
480
- RISCV::VMV_V_X, RISCV::VMSNE_VI},
481
- LT.second , CostKind));
483
+ (1 + getRISCVInstructionCost ({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
484
+ RISCV::VMV_X_S, RISCV::VMV_V_X,
485
+ RISCV::VMSNE_VI},
486
+ LT.second , CostKind));
482
487
}
483
488
484
489
if (HasScalar) {
@@ -523,9 +528,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
523
528
if (LT.second .isFixedLengthVector ())
524
529
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
525
530
LenCost = isInt<5 >(LT.second .getVectorNumElements () - 1 ) ? 0 : 1 ;
526
- // FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX}
527
- InstructionCost GatherCost =
528
- 2 + getRISCVInstructionCost (RISCV::VRGATHER_VV, LT. second , CostKind);
531
+ InstructionCost GatherCost = getRISCVInstructionCost (
532
+ {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV}, LT. second ,
533
+ CostKind);
529
534
// Mask operation additionally required extend and truncate
530
535
InstructionCost ExtendCost = Tp->getElementType ()->isIntegerTy (1 ) ? 3 : 0 ;
531
536
return LT.first * (LenCost + GatherCost + ExtendCost);
@@ -1358,19 +1363,53 @@ RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
1358
1363
return BaseT::getMinMaxReductionCost (IID, Ty, FMF, CostKind);
1359
1364
1360
1365
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (Ty);
1361
- if (Ty->getElementType ()->isIntegerTy (1 ))
1362
- // vcpop sequences, see vreduction-mask.ll. umax, smin actually only
1363
- // cost 2, but we don't have enough info here so we slightly over cost.
1364
- return (LT.first - 1 ) + 3 ;
1366
+ std::array<unsigned , 3 > Opcodes;
1367
+ if (Ty->getElementType ()->isIntegerTy (1 )) {
1368
+ // vcpop sequences, see vreduction-mask.ll.
1369
+ if ((IID == Intrinsic::umax) || (IID == Intrinsic::smin))
1370
+ Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M, RISCV::SLT};
1371
+ else
1372
+ Opcodes = {RISCV::VCPOP_M, RISCV::SLT};
1373
+ return (LT.first - 1 ) +
1374
+ getRISCVInstructionCost (Opcodes, LT.second , CostKind);
1375
+ }
1365
1376
1366
1377
// IR Reduction is composed by two vmv and one rvv reduction instruction.
1367
- InstructionCost BaseCost = 2 ;
1368
-
1369
- if (CostKind == TTI::TCK_CodeSize)
1370
- return (LT.first - 1 ) + BaseCost;
1371
-
1372
- unsigned VL = getEstimatedVLFor (Ty);
1373
- return (LT.first - 1 ) + BaseCost + Log2_32_Ceil (VL);
1378
+ unsigned SplitOp;
1379
+ switch (IID) {
1380
+ default :
1381
+ llvm_unreachable (" Unsupported intrinsic" );
1382
+ case Intrinsic::smax:
1383
+ SplitOp = RISCV::VMAX_VV;
1384
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1385
+ break ;
1386
+ case Intrinsic::smin:
1387
+ SplitOp = RISCV::VMIN_VV;
1388
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1389
+ break ;
1390
+ case Intrinsic::umax:
1391
+ SplitOp = RISCV::VMAXU_VV;
1392
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1393
+ break ;
1394
+ case Intrinsic::umin:
1395
+ SplitOp = RISCV::VMINU_VV;
1396
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1397
+ break ;
1398
+ case Intrinsic::maxnum:
1399
+ SplitOp = RISCV::VFMAX_VV;
1400
+ Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1401
+ break ;
1402
+ case Intrinsic::minnum:
1403
+ SplitOp = RISCV::VFMIN_VV;
1404
+ Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1405
+ break ;
1406
+ }
1407
+ // Add a cost for data larger than LMUL8
1408
+ InstructionCost SplitCost =
1409
+ (LT.first > 1 ) ? (LT.first - 1 ) *
1410
+ getRISCVInstructionCost (SplitOp, LT.second , CostKind)
1411
+ : 0 ;
1412
+ return SplitCost + getRISCVInstructionCost (Opcodes, LT.second , CostKind);
1374
1413
}
1375
1414
1376
1415
InstructionCost
@@ -1392,20 +1431,50 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1392
1431
return BaseT::getArithmeticReductionCost (Opcode, Ty, FMF, CostKind);
1393
1432
1394
1433
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (Ty);
1395
- if (Ty->getElementType ()->isIntegerTy (1 ))
1434
+ std::array<unsigned , 3 > Opcodes;
1435
+ if (Ty->getElementType ()->isIntegerTy (1 )) {
1396
1436
// vcpop sequences, see vreduction-mask.ll
1397
- return (LT.first - 1 ) + (ISD == ISD::AND ? 3 : 2 );
1437
+ if (ISD == ISD::AND)
1438
+ Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M, RISCV::SLT};
1439
+ else
1440
+ Opcodes = {RISCV::VCPOP_M, RISCV::SLT};
1441
+ return (LT.first - 1 ) +
1442
+ getRISCVInstructionCost (Opcodes, LT.second , CostKind);
1443
+ }
1398
1444
1399
1445
// IR Reduction is composed by two vmv and one rvv reduction instruction.
1400
- InstructionCost BaseCost = 2 ;
1401
-
1402
- if (CostKind == TTI::TCK_CodeSize)
1403
- return (LT.first - 1 ) + BaseCost;
1404
-
1405
- unsigned VL = getEstimatedVLFor (Ty);
1406
- if (TTI::requiresOrderedReduction (FMF))
1407
- return (LT.first - 1 ) + BaseCost + VL;
1408
- return (LT.first - 1 ) + BaseCost + Log2_32_Ceil (VL);
1446
+ unsigned SplitOp;
1447
+ switch (ISD) {
1448
+ case ISD::ADD:
1449
+ SplitOp = RISCV::VADD_VV;
1450
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1451
+ break ;
1452
+ case ISD::OR:
1453
+ SplitOp = RISCV::VOR_VV;
1454
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
1455
+ break ;
1456
+ case ISD::XOR:
1457
+ SplitOp = RISCV::VXOR_VV;
1458
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1459
+ break ;
1460
+ case ISD::AND:
1461
+ SplitOp = RISCV::VAND_VV;
1462
+ Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
1463
+ break ;
1464
+ case ISD::FADD:
1465
+ SplitOp = RISCV::VFADD_VV;
1466
+ if (TTI::requiresOrderedReduction (FMF))
1467
+ Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDOSUM_VS, RISCV::VFMV_F_S};
1468
+ else
1469
+ Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1470
+ break ;
1471
+ }
1472
+ // Add a cost for data larger than LMUL8
1473
+ InstructionCost SplitCost =
1474
+ (LT.first > 1 ) ? (LT.first - 1 ) *
1475
+ getRISCVInstructionCost (SplitOp, LT.second , CostKind)
1476
+ : 0 ;
1477
+ return SplitCost + getRISCVInstructionCost (Opcodes, LT.second , CostKind);
1409
1478
}
1410
1479
1411
1480
InstructionCost RISCVTTIImpl::getExtendedReductionCost (
0 commit comments