Skip to content

Commit abb0bf7

Browse files
committed
Remove recursive [de]interleaving code, and tighten assertions so Factor <= 8 for scalable VF
1 parent 181bdc3 commit abb0bf7

File tree

4 files changed

+25
-695
lines changed

4 files changed

+25
-695
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3167,10 +3167,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
31673167
if (hasIrregularType(ScalarTy, DL))
31683168
return false;
31693169

3170-
// For scalable vectors, the interleave factors must be <= 8 or a power of 2
3171-
// since we require the (de)interleaveN intrinsics instead of shufflevectors.
3172-
if (VF.isScalable() &&
3173-
!(InterleaveFactor <= 8 || isPowerOf2_32(InterleaveFactor)))
3170+
// For scalable vectors, the interleave factors must be <= 8 since we require
3171+
// the (de)interleaveN intrinsics instead of shufflevectors.
3172+
if (VF.isScalable() && InterleaveFactor > 8)
31743173
return false;
31753174

31763175
// If the group involves a non-integral pointer, we may not be able to
@@ -8709,11 +8708,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
87098708
bool Result = (VF.isVector() && // Query is illegal for VF == 1
87108709
CM.getWideningDecision(IG->getInsertPos(), VF) ==
87118710
LoopVectorizationCostModel::CM_Interleave);
8712-
// For scalable vectors, the interleave factors must be <= 8 or a power of
8713-
// 2 since we require the (de)interleaveN intrinsics instead of
8714-
// shufflevectors.
8715-
assert((!Result || !VF.isScalable() ||
8716-
(IG->getFactor() <= 8 || isPowerOf2_32(IG->getFactor()))) &&
8711+
// For scalable vectors, the interleave factors must be <= 8 since we
8712+
// require the (de)interleaveN intrinsics instead of shufflevectors.
8713+
assert((!Result || !VF.isScalable() || IG->getFactor() <= 8) &&
87178714
"Unsupported interleave factor for scalable vectors");
87188715
return Result;
87198716
};

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 15 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -3294,29 +3294,13 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
32943294
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
32953295
// must use intrinsics to interleave.
32963296
if (VecTy->isScalableTy()) {
3297-
if (Factor <= 8) {
3298-
VectorType *InterleaveTy = VectorType::get(
3299-
VecTy->getElementType(),
3300-
VecTy->getElementCount().multiplyCoefficientBy(Factor));
3301-
return Builder.CreateIntrinsic(InterleaveTy,
3302-
getInterleaveIntrinsicID(Factor), Vals,
3303-
/*FMFSource=*/nullptr, Name);
3304-
}
3305-
assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
3306-
"scalable vectors, must be power of 2");
3307-
SmallVector<Value *> InterleavingValues(Vals);
3308-
// When interleaving, the number of values will be shrunk until we have the
3309-
// single final interleaved value.
3310-
auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
3311-
for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
3312-
InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
3313-
for (unsigned I = 0; I < Midpoint; ++I)
3314-
InterleavingValues[I] = Builder.CreateIntrinsic(
3315-
InterleaveTy, Intrinsic::vector_interleave2,
3316-
{InterleavingValues[I], InterleavingValues[Midpoint + I]},
3317-
/*FMFSource=*/nullptr, Name);
3318-
}
3319-
return InterleavingValues[0];
3297+
assert(Factor <= 8 && "Unsupported interelave factor for scalable vectors");
3298+
VectorType *InterleaveTy =
3299+
VectorType::get(VecTy->getElementType(),
3300+
VecTy->getElementCount().multiplyCoefficientBy(Factor));
3301+
return Builder.CreateIntrinsic(InterleaveTy,
3302+
getInterleaveIntrinsicID(Factor), Vals,
3303+
/*FMFSource=*/nullptr, Name);
33203304
}
33213305

33223306
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -3402,7 +3386,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34023386
&InterleaveFactor](Value *MaskForGaps) -> Value * {
34033387
if (State.VF.isScalable()) {
34043388
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
3405-
assert((InterleaveFactor <= 8 || isPowerOf2_32(InterleaveFactor)) &&
3389+
assert(InterleaveFactor <= 8 &&
34063390
"Unsupported deinterleave factor for scalable vectors");
34073391
auto *ResBlockInMask = State.get(BlockInMask);
34083392
SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
@@ -3448,51 +3432,16 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34483432
if (VecTy->isScalableTy()) {
34493433
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
34503434
// so must use intrinsics to deinterleave.
3451-
SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
3452-
3453-
if (InterleaveFactor <= 8) {
3454-
Value *Deinterleave = State.Builder.CreateIntrinsic(
3455-
getDeinterleaveIntrinsicID(InterleaveFactor), NewLoad->getType(),
3456-
NewLoad,
3457-
/*FMFSource=*/nullptr, "strided.vec");
3458-
for (unsigned I = 0; I < InterleaveFactor; I++)
3459-
DeinterleavedValues[I] =
3460-
State.Builder.CreateExtractValue(Deinterleave, I);
3461-
} else {
3462-
// TODO: No in-tree target will reach this case. Should this be removed?
3463-
assert(isPowerOf2_32(InterleaveFactor) &&
3464-
"Unsupported deinterleave factor for scalable vectors");
3465-
DeinterleavedValues[0] = NewLoad;
3466-
// For InterleaveFactor > 8 we have to do recursive deinterleaving via
3467-
// deinterleave2, because the intrinsics only go up to Factor 8. We
3468-
// currently only support power-of-2 factors. When deinterleaving, the
3469-
// number of values will double until we have "InterleaveFactor".
3470-
// Deinterleave the elements within the vector
3471-
SmallVector<Value *> TempDeinterleavedValues(InterleaveFactor);
3472-
for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
3473-
NumVectors *= 2) {
3474-
for (unsigned I = 0; I < NumVectors; ++I) {
3475-
auto *DiTy = DeinterleavedValues[I]->getType();
3476-
TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
3477-
Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3478-
/*FMFSource=*/nullptr, "strided.vec");
3479-
}
3480-
// Extract the deinterleaved values:
3481-
for (unsigned I = 0; I < 2; ++I)
3482-
for (unsigned J = 0; J < NumVectors; ++J)
3483-
DeinterleavedValues[NumVectors * I + J] =
3484-
State.Builder.CreateExtractValue(TempDeinterleavedValues[J],
3485-
I);
3486-
}
3487-
}
3435+
assert(InterleaveFactor <= 8 &&
3436+
"Unsupported deinterleave factor for scalable vectors");
3437+
Value *Deinterleave = State.Builder.CreateIntrinsic(
3438+
getDeinterleaveIntrinsicID(InterleaveFactor), NewLoad->getType(),
3439+
NewLoad,
3440+
/*FMFSource=*/nullptr, "strided.vec");
34883441

3489-
#ifndef NDEBUG
3490-
for (Value *Val : DeinterleavedValues)
3491-
assert(Val && "NULL Deinterleaved Value");
3492-
#endif
34933442
for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
34943443
Instruction *Member = Group->getMember(I);
3495-
Value *StridedVec = DeinterleavedValues[I];
3444+
Value *StridedVec = State.Builder.CreateExtractValue(Deinterleave, I);
34963445
if (!Member) {
34973446
// This value is not needed as it's not used
34983447
cast<Instruction>(StridedVec)->eraseFromParent();

llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,8 +375,8 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n
375375
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP9]], align 4
376376
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
377377
; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
378-
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
379378
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP10]])
379+
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
380380
; CHECK-NEXT: [[REVERSE1:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP11]])
381381
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <vscale x 4 x i32> [[REVERSE]], [[VEC_IND]]
382382
; CHECK-NEXT: [[TMP13:%.*]] = sub nsw <vscale x 4 x i32> [[REVERSE1]], [[VEC_IND]]
@@ -1587,12 +1587,12 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A,
15871587
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 16 x i32>, ptr [[TMP10]], align 4
15881588
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> [[WIDE_VEC]])
15891589
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
1590-
; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
1591-
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 2
1592-
; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 3
15931590
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP13]])
1591+
; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
15941592
; CHECK-NEXT: [[REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP14]])
1593+
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 2
15951594
; CHECK-NEXT: [[REVERSE4:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP15]])
1595+
; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 3
15961596
; CHECK-NEXT: [[REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[TMP16]])
15971597
; CHECK-NEXT: [[TMP17:%.*]] = add nsw <vscale x 4 x i32> [[REVERSE]], [[VEC_IND]]
15981598
; CHECK-NEXT: [[TMP18:%.*]] = sub nsw <vscale x 4 x i32> [[REVERSE3]], [[VEC_IND]]

0 commit comments

Comments
 (0)