@@ -1505,6 +1505,31 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
1505
1505
if (!isTypeLegal (Val))
1506
1506
return BaseT::getVectorInstrCost (Opcode, Val, CostKind, Index, Op0, Op1);
1507
1507
1508
+ // Mask vector extract/insert is expanded via e8.
1509
+ if (Val->getScalarSizeInBits () == 1 ) {
1510
+ VectorType *WideTy =
1511
+ VectorType::get (IntegerType::get (Val->getContext (), 8 ),
1512
+ cast<VectorType>(Val)->getElementCount ());
1513
+ if (Opcode == Instruction::ExtractElement) {
1514
+ InstructionCost ExtendCost
1515
+ = getCastInstrCost (Instruction::ZExt, WideTy, Val,
1516
+ TTI::CastContextHint::None, CostKind);
1517
+ InstructionCost ExtractCost
1518
+ = getVectorInstrCost (Opcode, WideTy, CostKind, Index, nullptr , nullptr );
1519
+ return ExtendCost + ExtractCost;
1520
+ }
1521
+ InstructionCost ExtendCost
1522
+ = getCastInstrCost (Instruction::ZExt, WideTy, Val,
1523
+ TTI::CastContextHint::None, CostKind);
1524
+ InstructionCost InsertCost
1525
+ = getVectorInstrCost (Opcode, WideTy, CostKind, Index, nullptr , nullptr );
1526
+ InstructionCost TruncCost
1527
+ = getCastInstrCost (Instruction::Trunc, Val, WideTy,
1528
+ TTI::CastContextHint::None, CostKind);
1529
+ return ExtendCost + InsertCost + TruncCost;
1530
+ }
1531
+
1532
+
1508
1533
// In RVV, we could use vslidedown + vmv.x.s to extract element from vector
1509
1534
// and vslideup + vmv.s.x to insert element to vector.
1510
1535
unsigned BaseCost = 1 ;
@@ -1526,30 +1551,6 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
1526
1551
SlideCost = 1 ; // With a constant index, we do not need to use addi.
1527
1552
}
1528
1553
1529
- // Mask vector extract/insert element is different from normal case.
1530
- if (Val->getScalarSizeInBits () == 1 ) {
1531
- // For extractelement, we need the following instructions:
1532
- // vmv.v.i v8, 0
1533
- // vmerge.vim v8, v8, 1, v0
1534
- // vsetivli zero, 1, e8, m2, ta, mu (not count)
1535
- // vslidedown.vx v8, v8, a0
1536
- // vmv.x.s a0, v8
1537
-
1538
- // For insertelement, we need the following instructions:
1539
- // vsetvli a2, zero, e8, m1, ta, mu (not count)
1540
- // vmv.s.x v8, a0
1541
- // vmv.v.i v9, 0
1542
- // vmerge.vim v9, v9, 1, v0
1543
- // addi a0, a1, 1
1544
- // vsetvli zero, a0, e8, m1, tu, mu (not count)
1545
- // vslideup.vx v9, v8, a1
1546
- // vsetvli a0, zero, e8, m1, ta, mu (not count)
1547
- // vand.vi v8, v9, 1
1548
- // vmsne.vi v0, v8, 0
1549
-
1550
- // TODO: should we count these special vsetvlis?
1551
- BaseCost = Opcode == Instruction::InsertElement ? 5 : 3 ;
1552
- }
1553
1554
// Extract i64 in the target that has XLEN=32 need more instruction.
1554
1555
if (Val->getScalarType ()->isIntegerTy () &&
1555
1556
ST->getXLen () < Val->getScalarSizeInBits ()) {
0 commit comments