Skip to content

Commit 3f1fef3

Browse files
authored
[RISCV] Support interleaved accesses for scalable vector. (#90583)
The support for interleaved accesses for scalable vector with a factor of 2 is enabled in vectorizer. Therefore, the patch removed the restriction for scalable vector with a factor of 2.
1 parent bd909d2 commit 3f1fef3

File tree

3 files changed

+1268
-322
lines changed

3 files changed

+1268
-322
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -599,37 +599,44 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
599599
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
600600
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
601601
bool UseMaskForCond, bool UseMaskForGaps) {
602-
if (isa<ScalableVectorType>(VecTy))
602+
if (isa<ScalableVectorType>(VecTy) && Factor != 2)
603603
return InstructionCost::getInvalid();
604-
auto *FVTy = cast<FixedVectorType>(VecTy);
605-
InstructionCost MemCost =
606-
getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);
607-
unsigned VF = FVTy->getNumElements() / Factor;
608604

609605
// The interleaved memory access pass will lower interleaved memory ops (i.e
610606
// a load and store followed by a specific shuffle) to vlseg/vsseg
611607
// intrinsics. In those cases then we can treat it as if it's just one (legal)
612608
// memory op
613609
if (!UseMaskForCond && !UseMaskForGaps &&
614610
Factor <= TLI->getMaxSupportedInterleaveFactor()) {
615-
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(FVTy);
611+
auto *VTy = cast<VectorType>(VecTy);
612+
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VTy);
616613
// Need to make sure type has't been scalarized
617-
if (LT.second.isFixedLengthVector()) {
618-
auto *LegalFVTy = FixedVectorType::get(FVTy->getElementType(),
619-
LT.second.getVectorNumElements());
614+
if (LT.second.isVector()) {
615+
auto *LegalVTy = VectorType::get(VTy->getElementType(),
616+
LT.second.getVectorElementCount());
620617
// FIXME: We use the memory op cost of the *legalized* type here, becuase
621618
// it's getMemoryOpCost returns a really expensive cost for types like
622619
// <6 x i8>, which show up when doing interleaves of Factor=3 etc.
623620
// Should the memory op cost of these be cheaper?
624-
if (TLI->isLegalInterleavedAccessType(LegalFVTy, Factor, Alignment,
621+
if (TLI->isLegalInterleavedAccessType(LegalVTy, Factor, Alignment,
625622
AddressSpace, DL)) {
626623
InstructionCost LegalMemCost = getMemoryOpCost(
627-
Opcode, LegalFVTy, Alignment, AddressSpace, CostKind);
624+
Opcode, LegalVTy, Alignment, AddressSpace, CostKind);
628625
return LT.first + LegalMemCost;
629626
}
630627
}
631628
}
632629

630+
// TODO: Return the cost of interleaved accesses for scalable vector when
631+
// unable to convert to segment accesses instructions.
632+
if (isa<ScalableVectorType>(VecTy))
633+
return InstructionCost::getInvalid();
634+
635+
auto *FVTy = cast<FixedVectorType>(VecTy);
636+
InstructionCost MemCost =
637+
getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);
638+
unsigned VF = FVTy->getNumElements() / Factor;
639+
633640
// An interleaved load will look like this for Factor=3:
634641
// %wide.vec = load <12 x i32>, ptr %3, align 4
635642
// %strided.vec = shufflevector %wide.vec, poison, <4 x i32> <stride mask>

0 commit comments

Comments
 (0)