@@ -2849,10 +2849,21 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
2849
2849
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2850
2850
// must use intrinsics to interleave.
2851
2851
if (VecTy->isScalableTy ()) {
2852
- VectorType *WideVecTy = VectorType::getDoubleElementsVectorType (VecTy);
2853
- return Builder.CreateIntrinsic (WideVecTy, Intrinsic::vector_interleave2,
2854
- Vals,
2855
- /* FMFSource=*/ nullptr , Name);
2852
+ assert (isPowerOf2_32 (Factor) && " Unsupported interleave factor for "
2853
+ " scalable vectors, must be power of 2" );
2854
+ SmallVector<Value *> InterleavingValues (Vals);
2855
+ // When interleaving, the number of values will be shrunk until we have the
2856
+ // single final interleaved value.
2857
+ auto *InterleaveTy = cast<VectorType>(InterleavingValues[0 ]->getType ());
2858
+ for (unsigned Midpoint = Factor / 2 ; Midpoint > 0 ; Midpoint /= 2 ) {
2859
+ InterleaveTy = VectorType::getDoubleElementsVectorType (InterleaveTy);
2860
+ for (unsigned I = 0 ; I < Midpoint; ++I)
2861
+ InterleavingValues[I] = Builder.CreateIntrinsic (
2862
+ InterleaveTy, Intrinsic::vector_interleave2,
2863
+ {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2864
+ /* FMFSource=*/ nullptr , Name);
2865
+ }
2866
+ return InterleavingValues[0 ];
2856
2867
}
2857
2868
2858
2869
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2938,15 +2949,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
2938
2949
&InterleaveFactor](Value *MaskForGaps) -> Value * {
2939
2950
if (State.VF .isScalable ()) {
2940
2951
assert (!MaskForGaps && " Interleaved groups with gaps are not supported." );
2941
- assert (InterleaveFactor == 2 &&
2952
+ assert (isPowerOf2_32 ( InterleaveFactor) &&
2942
2953
" Unsupported deinterleave factor for scalable vectors" );
2943
2954
auto *ResBlockInMask = State.get (BlockInMask);
2944
- SmallVector<Value *, 2 > Ops = {ResBlockInMask, ResBlockInMask};
2945
- auto *MaskTy = VectorType::get (State.Builder .getInt1Ty (),
2946
- State.VF .getKnownMinValue () * 2 , true );
2947
- return State.Builder .CreateIntrinsic (
2948
- MaskTy, Intrinsic::vector_interleave2, Ops,
2949
- /* FMFSource=*/ nullptr , " interleaved.mask" );
2955
+ SmallVector<Value *> Ops (InterleaveFactor, ResBlockInMask);
2956
+ return interleaveVectors (State.Builder , Ops, " interleaved.mask" );
2950
2957
}
2951
2958
2952
2959
if (!BlockInMask)
@@ -2986,22 +2993,48 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
2986
2993
ArrayRef<VPValue *> VPDefs = definedValues ();
2987
2994
const DataLayout &DL = State.CFG .PrevBB ->getDataLayout ();
2988
2995
if (VecTy->isScalableTy ()) {
2989
- assert (InterleaveFactor == 2 &&
2996
+ assert (isPowerOf2_32 ( InterleaveFactor) &&
2990
2997
" Unsupported deinterleave factor for scalable vectors" );
2991
2998
2992
- // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2993
- // so must use intrinsics to deinterleave.
2994
- Value *DI = State.Builder .CreateIntrinsic (
2995
- Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2996
- /* FMFSource=*/ nullptr , " strided.vec" );
2997
- unsigned J = 0 ;
2998
- for (unsigned I = 0 ; I < InterleaveFactor; ++I) {
2999
- Instruction *Member = Group->getMember (I);
2999
+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3000
+ // so must use intrinsics to deinterleave.
3001
+ SmallVector<Value *> DeinterleavedValues (InterleaveFactor);
3002
+ DeinterleavedValues[0 ] = NewLoad;
3003
+ // For the case of InterleaveFactor > 2, we will have to do recursive
3004
+ // deinterleaving, because the current available deinterleave intrinsic
3005
+ // supports only Factor of 2, otherwise it will bailout after first
3006
+ // iteration.
3007
+ // When deinterleaving, the number of values will double until we
3008
+ // have "InterleaveFactor".
3009
+ for (unsigned NumVectors = 1 ; NumVectors < InterleaveFactor;
3010
+ NumVectors *= 2 ) {
3011
+ // Deinterleave the elements within the vector
3012
+ SmallVector<Value *> TempDeinterleavedValues (NumVectors);
3013
+ for (unsigned I = 0 ; I < NumVectors; ++I) {
3014
+ auto *DiTy = DeinterleavedValues[I]->getType ();
3015
+ TempDeinterleavedValues[I] = State.Builder .CreateIntrinsic (
3016
+ Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3017
+ /* FMFSource=*/ nullptr , " strided.vec" );
3018
+ }
3019
+ // Extract the deinterleaved values:
3020
+ for (unsigned I = 0 ; I < 2 ; ++I)
3021
+ for (unsigned J = 0 ; J < NumVectors; ++J)
3022
+ DeinterleavedValues[NumVectors * I + J] =
3023
+ State.Builder .CreateExtractValue (TempDeinterleavedValues[J], I);
3024
+ }
3000
3025
3001
- if (!Member)
3026
+ #ifndef NDEBUG
3027
+ for (Value *Val : DeinterleavedValues)
3028
+ assert (Val && " NULL Deinterleaved Value" );
3029
+ #endif
3030
+ for (unsigned I = 0 , J = 0 ; I < InterleaveFactor; ++I) {
3031
+ Instruction *Member = Group->getMember (I);
3032
+ Value *StridedVec = DeinterleavedValues[I];
3033
+ if (!Member) {
3034
+ // This value is not needed as it's not used
3035
+ static_cast <Instruction *>(StridedVec)->eraseFromParent ();
3002
3036
continue ;
3003
-
3004
- Value *StridedVec = State.Builder .CreateExtractValue (DI, I);
3037
+ }
3005
3038
// If this member has different type, cast the result type.
3006
3039
if (Member->getType () != ScalarTy) {
3007
3040
VectorType *OtherVTy = VectorType::get (Member->getType (), State.VF );
0 commit comments