Skip to content

Commit 332d264

Browse files
committed
1 parent d5a96eb commit 332d264

File tree

6 files changed

+671
-1387
lines changed

6 files changed

+671
-1387
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3489,10 +3489,10 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
34893489
if (hasIrregularType(ScalarTy, DL))
34903490
return false;
34913491

3492-
// For scalable vectors, the only interleave factor currently supported
3493-
// must be power of 2 since we require the (de)interleave2 intrinsics
3494-
// instead of shufflevectors.
3495-
if (VF.isScalable() && !isPowerOf2_32(InterleaveFactor))
3492+
// We currently only know how to emit interleave/deinterleave with
3493+
// Factor=2 for scalable vectors. This is purely an implementation
3494+
// limit.
3495+
if (VF.isScalable() && InterleaveFactor != 2)
34963496
return false;
34973497

34983498
// If the group involves a non-integral pointer, we may not be able to
@@ -9193,9 +9193,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91939193
CM.getWideningDecision(IG->getInsertPos(), VF) ==
91949194
LoopVectorizationCostModel::CM_Interleave);
91959195
// For scalable vectors, the only interleave factor currently supported
9196-
// must be power of 2 since we require the (de)interleave2 intrinsics
9197-
// instead of shufflevectors.
9198-
assert((!Result || !VF.isScalable() || isPowerOf2_32(IG->getFactor())) &&
9196+
// is 2 since we require the (de)interleave2 intrinsics instead of
9197+
// shufflevectors.
9198+
assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
91999199
"Unsupported interleave factor for scalable vectors");
92009200
return Result;
92019201
};

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 23 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -2789,21 +2789,10 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
27892789
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
27902790
// must use intrinsics to interleave.
27912791
if (VecTy->isScalableTy()) {
2792-
assert(isPowerOf2_32(Factor) && "Unsupported interleave factor for "
2793-
"scalable vectors, must be power of 2");
2794-
SmallVector<Value *> InterleavingValues(Vals);
2795-
// When interleaving, the number of values will be shrunk until we have the
2796-
// single final interleaved value.
2797-
auto *InterleaveTy = cast<VectorType>(InterleavingValues[0]->getType());
2798-
for (unsigned Midpoint = Factor / 2; Midpoint > 0; Midpoint /= 2) {
2799-
InterleaveTy = VectorType::getDoubleElementsVectorType(InterleaveTy);
2800-
for (unsigned I = 0; I < Midpoint; ++I)
2801-
InterleavingValues[I] = Builder.CreateIntrinsic(
2802-
InterleaveTy, Intrinsic::vector_interleave2,
2803-
{InterleavingValues[I], InterleavingValues[Midpoint + I]},
2804-
/*FMFSource=*/nullptr, Name);
2805-
}
2806-
return InterleavingValues[0];
2792+
VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
2793+
return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2794+
Vals,
2795+
/*FMFSource=*/nullptr, Name);
28072796
}
28082797

28092798
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2889,11 +2878,15 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
28892878
&InterleaveFactor](Value *MaskForGaps) -> Value * {
28902879
if (State.VF.isScalable()) {
28912880
assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2892-
assert(isPowerOf2_32(InterleaveFactor) &&
2881+
assert(InterleaveFactor == 2 &&
28932882
"Unsupported deinterleave factor for scalable vectors");
28942883
auto *ResBlockInMask = State.get(BlockInMask);
2895-
SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);
2896-
return interleaveVectors(State.Builder, Ops, "interleaved.mask");
2884+
SmallVector<Value *, 2> Ops = {ResBlockInMask, ResBlockInMask};
2885+
auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2886+
State.VF.getKnownMinValue() * 2, true);
2887+
return State.Builder.CreateIntrinsic(
2888+
MaskTy, Intrinsic::vector_interleave2, Ops,
2889+
/*FMFSource=*/nullptr, "interleaved.mask");
28972890
}
28982891

28992892
if (!BlockInMask)
@@ -2933,48 +2926,22 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
29332926
ArrayRef<VPValue *> VPDefs = definedValues();
29342927
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
29352928
if (VecTy->isScalableTy()) {
2936-
assert(isPowerOf2_32(InterleaveFactor) &&
2929+
assert(InterleaveFactor == 2 &&
29372930
"Unsupported deinterleave factor for scalable vectors");
29382931

2939-
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
2940-
// so must use intrinsics to deinterleave.
2941-
SmallVector<Value *> DeinterleavedValues(InterleaveFactor);
2942-
DeinterleavedValues[0] = NewLoad;
2943-
// For the case of InterleaveFactor > 2, we will have to do recursive
2944-
// deinterleaving, because the current available deinterleave intrinsic
2945-
// supports only Factor of 2, otherwise it will bailout after first
2946-
// iteration.
2947-
// When deinterleaving, the number of values will double until we
2948-
// have "InterleaveFactor".
2949-
for (unsigned NumVectors = 1; NumVectors < InterleaveFactor;
2950-
NumVectors *= 2) {
2951-
// Deinterleave the elements within the vector
2952-
SmallVector<Value *> TempDeinterleavedValues(NumVectors);
2953-
for (unsigned I = 0; I < NumVectors; ++I) {
2954-
auto *DiTy = DeinterleavedValues[I]->getType();
2955-
TempDeinterleavedValues[I] = State.Builder.CreateIntrinsic(
2956-
Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
2957-
/*FMFSource=*/nullptr, "strided.vec");
2958-
}
2959-
// Extract the deinterleaved values:
2960-
for (unsigned I = 0; I < 2; ++I)
2961-
for (unsigned J = 0; J < NumVectors; ++J)
2962-
DeinterleavedValues[NumVectors * I + J] =
2963-
State.Builder.CreateExtractValue(TempDeinterleavedValues[J], I);
2964-
}
2965-
2966-
#ifndef NDEBUG
2967-
for (Value *Val : DeinterleavedValues)
2968-
assert(Val && "NULL Deinterleaved Value");
2969-
#endif
2970-
for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {
2932+
// Scalable vectors cannot use arbitrary shufflevectors (only splats),
2933+
// so must use intrinsics to deinterleave.
2934+
Value *DI = State.Builder.CreateIntrinsic(
2935+
Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2936+
/*FMFSource=*/nullptr, "strided.vec");
2937+
unsigned J = 0;
2938+
for (unsigned I = 0; I < InterleaveFactor; ++I) {
29712939
Instruction *Member = Group->getMember(I);
2972-
Value *StridedVec = DeinterleavedValues[I];
2973-
if (!Member) {
2974-
// This value is not needed as it's not used
2975-
static_cast<Instruction *>(StridedVec)->eraseFromParent();
2940+
2941+
if (!Member)
29762942
continue;
2977-
}
2943+
2944+
Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
29782945
// If this member has different type, cast the result type.
29792946
if (Member->getType() != ScalarTy) {
29802947
VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

0 commit comments

Comments
 (0)