Skip to content

Commit 9491f75

Browse files
authored
Reland: [LV]: Teach LV to recursively (de)interleave. (#122989)
This commit relands the changes from "[LV]: Teach LV to recursively (de)interleave. #89018" Reason for revert: - The patch exposed a bug in the IA pass, the bug is now fixed and landed by commit: #122643
1 parent e79bb87 commit 9491f75

File tree

6 files changed

+1387
-671
lines changed

6 files changed

+1387
-671
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3505,10 +3505,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
35053505
if (hasIrregularType(ScalarTy, DL))
35063506
return false;
35073507

3508-
// We currently only know how to emit interleave/deinterleave with
3509-
// Factor=2 for scalable vectors. This is purely an implementation
3510-
// limit.
3511-
if (VF.isScalable() && InterleaveFactor != 2)
3508+
// For scalable vectors, the only interleave factor currently supported
3509+
// must be power of 2 since we require the (de)interleave2 intrinsics
3510+
// instead of shufflevectors.
3511+
if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
35123512
return false;
35133513

35143514
// If the group involves a non-integral pointer, we may not be able to
@@ -9435,9 +9435,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94359435
CM.getWideningDecision(IG->getInsertPos(), VF) ==
94369436
LoopVectorizationCostModel::CM_Interleave);
94379437
// For scalable vectors, the only interleave factor currently supported
9438-
// is 2 since we require the (de)interleave2 intrinsics instead of
9439-
// shufflevectors.
9440-
assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
9438+
// must be power of 2 since we require the (de)interleave2 intrinsics
9439+
// instead of shufflevectors.
9440+
assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
94419441
"Unsupported interleave factor for scalable vectors");
94429442
return Result;
94439443
};

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2863,10 +2863,21 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
28632863
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
28642864
// must use intrinsics to interleave.
28652865
if (VecTy->isScalableTy()) {
2866-
VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
2867-
return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2868-
Vals,
2869-
/*FMFSource=*/nullptr, Name);
2866+
assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
2867+
"scalable vectors, must be power of 2");
2868+
SmallVector<Value *> InterleavingValues(Vals);
2869+
// When interleaving, the number of values will be shrunk until we have the
2870+
// single final interleaved value.
2871+
auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
2872+
for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
2873+
InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
2874+
for (unsigned I = 0; I < Midpoint; ++I)
2875+
InterleavingValues[I] = Builder.CreateIntrinsic(
2876+
InterleaveTy, Intrinsic::vector_interleave2,
2877+
{InterleavingValues[I], InterleavingValues[Midpoint + I]},
2878+
/*FMFSource=*/nullptr, Name);
2879+
}
2880+
return InterleavingValues[0];
28702881
}
28712882

28722883
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2952,15 +2963,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
29522963
&InterleaveFactor](Value *MaskForGaps) -> Value * {
29532964
if (State.VF.isScalable()) {
29542965
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2955-
assert(InterleaveFactor == 2 &&
2966+
assert(isPowerOf2_32(InterleaveFactor) &&
29562967
"Unsupported deinterleave factor for scalable vectors");
29572968
auto *ResBlockInMask = State.get(BlockInMask);
2958-
SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2959-
auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2960-
State.VF.getKnownMinValue() * 2, true);
2961-
return State.Builder.CreateIntrinsic(
2962-
MaskTy, Intrinsic::vector_interleave2, Ops,
2963-
/*FMFSource=*/nullptr, "interleaved.mask");
2969+
SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
2970+
return interleaveVectors(State.Builder, Ops, "interleaved.mask");
29642971
}
29652972

29662973
if (!BlockInMask)
@@ -3000,22 +3007,48 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
30003007
ArrayRef<VPValue *> VPDefs = definedValues();
30013008
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
30023009
if (VecTy->isScalableTy()) {
3003-
assert(InterleaveFactor == 2 &&
3010+
assert(isPowerOf2_32(InterleaveFactor) &&
30043011
"Unsupported deinterleave factor for scalable vectors");
30053012

3006-
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
3007-
// so must use intrinsics to deinterleave.
3008-
Value *DI = State.Builder.CreateIntrinsic(
3009-
Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3010-
/*FMFSource=*/nullptr, "strided.vec");
3011-
unsigned J = 0;
3012-
for (unsigned I = 0; I < InterleaveFactor; ++I) {
3013-
Instruction *Member = Group->getMember(I);
3013+
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
3014+
// so must use intrinsics to deinterleave.
3015+
SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
3016+
DeinterleavedValues[0] = NewLoad;
3017+
// For the case of InterleaveFactor > 2, we will have to do recursive
3018+
// deinterleaving, because the current available deinterleave intrinsic
3019+
// supports only Factor of 2, otherwise it will bailout after first
3020+
// iteration.
3021+
// When deinterleaving, the number of values will double until we
3022+
// have "InterleaveFactor".
3023+
for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
3024+
NumVectors *= 2) {
3025+
// Deinterleave the elements within the vector
3026+
SmallVector<Value *> TempDeinterleavedValues(NumVectors);
3027+
for (unsigned I = 0; I < NumVectors; ++I) {
3028+
auto *DiTy = DeinterleavedValues[I]->getType();
3029+
TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
3030+
Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3031+
/*FMFSource=*/nullptr, "strided.vec");
3032+
}
3033+
// Extract the deinterleaved values:
3034+
for (unsigned I = 0; I < 2; ++I)
3035+
for (unsigned J = 0; J < NumVectors; ++J)
3036+
DeinterleavedValues[NumVectors * I + J] =
3037+
State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
3038+
}
30143039

3015-
if (!Member)
3040+
#ifndef NDEBUG
3041+
for (Value *Val : DeinterleavedValues)
3042+
assert(Val && "NULL Deinterleaved Value");
3043+
#endif
3044+
for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
3045+
Instruction *Member = Group->getMember(I);
3046+
Value *StridedVec = DeinterleavedValues[I];
3047+
if (!Member) {
3048+
// This value is not needed as it's not used
3049+
cast<Instruction>(StridedVec)->eraseFromParent();
30163050
continue;
3017-
3018-
Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
3051+
}
30193052
// If this member has different type, cast the result type.
30203053
if (Member->getType() != ScalarTy) {
30213054
VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

0 commit comments

Comments
 (0)