Skip to content

Commit 2f69919

Browse files
committed
Reland "[LV]: Teach LV to recursively (de)interleave."
This commit relands the changes from "[LV]: Teach LV to recursively (de)interleave.#122989" Reason for revert: - The patch exposed an assert in the vectorizer related to VF difference between legacy cost model and VPlan-based cost model because of uncalculated cost for VPInstruction which is created by VPlanTransforms as a replacement to 'or disjoint' instruction. - The fix is about skipping comparing legacy model to vplan-based model for at least interleaving factor > 2.
1 parent f0d05b0 commit 2f69919

File tree

6 files changed

+1387
-671
lines changed

6 files changed

+1387
-671
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3400,10 +3400,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
34003400
if (hasIrregularType(ScalarTy, DL))
34013401
return false;
34023402

3403-
// We currently only know how to emit interleave/deinterleave with
3404-
// Factor=2 for scalable vectors. This is purely an implementation
3405-
// limit.
3406-
if (VF.isScalable() && InterleaveFactor != 2)
3403+
// For scalable vectors, the only interleave factor currently supported
3404+
// must be power of 2 since we require the (de)interleave2 intrinsics
3405+
// instead of shufflevectors.
3406+
if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
34073407
return false;
34083408

34093409
// If the group involves a non-integral pointer, we may not be able to
@@ -9279,9 +9279,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92799279
CM.getWideningDecision(IG->getInsertPos(), VF) ==
92809280
LoopVectorizationCostModel::CM_Interleave);
92819281
// For scalable vectors, the only interleave factor currently supported
9282-
// is 2 since we require the (de)interleave2 intrinsics instead of
9283-
// shufflevectors.
9284-
assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
9282+
// must be power of 2 since we require the (de)interleave2 intrinsics
9283+
// instead of shufflevectors.
9284+
assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
92859285
"Unsupported interleave factor for scalable vectors");
92869286
return Result;
92879287
};

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2868,10 +2868,21 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
28682868
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
28692869
// must use intrinsics to interleave.
28702870
if (VecTy->isScalableTy()) {
2871-
VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
2872-
return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2873-
Vals,
2874-
/*FMFSource=*/nullptr, Name);
2871+
assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
2872+
"scalable vectors, must be power of 2");
2873+
SmallVector<Value *> InterleavingValues(Vals);
2874+
// When interleaving, the number of values will be shrunk until we have the
2875+
// single final interleaved value.
2876+
auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
2877+
for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
2878+
InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
2879+
for (unsigned I = 0; I < Midpoint; ++I)
2880+
InterleavingValues[I] = Builder.CreateIntrinsic(
2881+
InterleaveTy, Intrinsic::vector_interleave2,
2882+
{InterleavingValues[I], InterleavingValues[Midpoint + I]},
2883+
/*FMFSource=*/nullptr, Name);
2884+
}
2885+
return InterleavingValues[0];
28752886
}
28762887

28772888
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2957,15 +2968,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
29572968
&InterleaveFactor](Value *MaskForGaps) -> Value * {
29582969
if (State.VF.isScalable()) {
29592970
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2960-
assert(InterleaveFactor == 2 &&
2971+
assert(isPowerOf2_32(InterleaveFactor) &&
29612972
"Unsupported deinterleave factor for scalable vectors");
29622973
auto *ResBlockInMask = State.get(BlockInMask);
2963-
SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2964-
auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2965-
State.VF.getKnownMinValue() * 2, true);
2966-
return State.Builder.CreateIntrinsic(
2967-
MaskTy, Intrinsic::vector_interleave2, Ops,
2968-
/*FMFSource=*/nullptr, "interleaved.mask");
2974+
SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
2975+
return interleaveVectors(State.Builder, Ops, "interleaved.mask");
29692976
}
29702977

29712978
if (!BlockInMask)
@@ -3005,22 +3012,48 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
30053012
ArrayRef<VPValue *> VPDefs = definedValues();
30063013
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
30073014
if (VecTy->isScalableTy()) {
3008-
assert(InterleaveFactor == 2 &&
3015+
assert(isPowerOf2_32(InterleaveFactor) &&
30093016
"Unsupported deinterleave factor for scalable vectors");
30103017

3011-
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
3012-
// so must use intrinsics to deinterleave.
3013-
Value *DI = State.Builder.CreateIntrinsic(
3014-
Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3015-
/*FMFSource=*/nullptr, "strided.vec");
3016-
unsigned J = 0;
3017-
for (unsigned I = 0; I < InterleaveFactor; ++I) {
3018-
Instruction *Member = Group->getMember(I);
3018+
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
3019+
// so must use intrinsics to deinterleave.
3020+
SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
3021+
DeinterleavedValues[0] = NewLoad;
3022+
// For the case of InterleaveFactor > 2, we will have to do recursive
3023+
// deinterleaving, because the current available deinterleave intrinsic
3024+
// supports only Factor of 2, otherwise it will bailout after first
3025+
// iteration.
3026+
// When deinterleaving, the number of values will double until we
3027+
// have "InterleaveFactor".
3028+
for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
3029+
NumVectors *= 2) {
3030+
// Deinterleave the elements within the vector
3031+
SmallVector<Value *> TempDeinterleavedValues(NumVectors);
3032+
for (unsigned I = 0; I < NumVectors; ++I) {
3033+
auto *DiTy = DeinterleavedValues[I]->getType();
3034+
TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
3035+
Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3036+
/*FMFSource=*/nullptr, "strided.vec");
3037+
}
3038+
// Extract the deinterleaved values:
3039+
for (unsigned I = 0; I < 2; ++I)
3040+
for (unsigned J = 0; J < NumVectors; ++J)
3041+
DeinterleavedValues[NumVectors * I + J] =
3042+
State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
3043+
}
30193044

3020-
if (!Member)
3045+
#ifndef NDEBUG
3046+
for (Value *Val : DeinterleavedValues)
3047+
assert(Val && "NULL Deinterleaved Value");
3048+
#endif
3049+
for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
3050+
Instruction *Member = Group->getMember(I);
3051+
Value *StridedVec = DeinterleavedValues[I];
3052+
if (!Member) {
3053+
// This value is not needed as it's not used
3054+
cast<Instruction>(StridedVec)->eraseFromParent();
30213055
continue;
3022-
3023-
Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
3056+
}
30243057
// If this member has different type, cast the result type.
30253058
if (Member->getType() != ScalarTy) {
30263059
VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

0 commit comments

Comments
 (0)