@@ -599,37 +599,44 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
599
599
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned > Indices,
600
600
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
601
601
bool UseMaskForCond, bool UseMaskForGaps) {
602
- if (isa<ScalableVectorType>(VecTy))
602
+ if (isa<ScalableVectorType>(VecTy) && Factor != 2 )
603
603
return InstructionCost::getInvalid ();
604
- auto *FVTy = cast<FixedVectorType>(VecTy);
605
- InstructionCost MemCost =
606
- getMemoryOpCost (Opcode, VecTy, Alignment, AddressSpace, CostKind);
607
- unsigned VF = FVTy->getNumElements () / Factor;
608
604
609
605
// The interleaved memory access pass will lower interleaved memory ops (i.e
610
606
// a load and store followed by a specific shuffle) to vlseg/vsseg
611
607
// intrinsics. In those cases then we can treat it as if it's just one (legal)
612
608
// memory op
613
609
if (!UseMaskForCond && !UseMaskForGaps &&
614
610
Factor <= TLI->getMaxSupportedInterleaveFactor ()) {
615
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (FVTy);
611
+ auto *VTy = cast<VectorType>(VecTy);
612
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost (VTy);
616
613
// Need to make sure type has't been scalarized
617
- if (LT.second .isFixedLengthVector ()) {
618
- auto *LegalFVTy = FixedVectorType ::get (FVTy ->getElementType (),
619
- LT.second .getVectorNumElements ());
614
+ if (LT.second .isVector ()) {
615
+ auto *LegalVTy = VectorType ::get (VTy ->getElementType (),
616
+ LT.second .getVectorElementCount ());
620
617
// FIXME: We use the memory op cost of the *legalized* type here, becuase
621
618
// it's getMemoryOpCost returns a really expensive cost for types like
622
619
// <6 x i8>, which show up when doing interleaves of Factor=3 etc.
623
620
// Should the memory op cost of these be cheaper?
624
- if (TLI->isLegalInterleavedAccessType (LegalFVTy , Factor, Alignment,
621
+ if (TLI->isLegalInterleavedAccessType (LegalVTy , Factor, Alignment,
625
622
AddressSpace, DL)) {
626
623
InstructionCost LegalMemCost = getMemoryOpCost (
627
- Opcode, LegalFVTy , Alignment, AddressSpace, CostKind);
624
+ Opcode, LegalVTy , Alignment, AddressSpace, CostKind);
628
625
return LT.first + LegalMemCost;
629
626
}
630
627
}
631
628
}
632
629
630
+ // TODO: Return the cost of interleaved accesses for scalable vector when
631
+ // unable to convert to segment accesses instructions.
632
+ if (isa<ScalableVectorType>(VecTy))
633
+ return InstructionCost::getInvalid ();
634
+
635
+ auto *FVTy = cast<FixedVectorType>(VecTy);
636
+ InstructionCost MemCost =
637
+ getMemoryOpCost (Opcode, VecTy, Alignment, AddressSpace, CostKind);
638
+ unsigned VF = FVTy->getNumElements () / Factor;
639
+
633
640
// An interleaved load will look like this for Factor=3:
634
641
// %wide.vec = load <12 x i32>, ptr %3, align 4
635
642
// %strided.vec = shufflevector %wide.vec, poison, <4 x i32> <stride mask>
0 commit comments