@@ -2863,10 +2863,21 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
2863
2863
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2864
2864
// must use intrinsics to interleave.
2865
2865
if (VecTy->isScalableTy ()) {
2866
- VectorType *WideVecTy = VectorType::getDoubleElementsVectorType (VecTy);
2867
- return Builder.CreateIntrinsic (WideVecTy, Intrinsic::vector_interleave2,
2868
- Vals,
2869
- /* FMFSource=*/ nullptr , Name);
2866
+ assert (isPowerOf2_32 (Factor) && " Unsupported interleave factor for "
2867
+ " scalable vectors, must be power of 2" );
2868
+ SmallVector<Value *> InterleavingValues (Vals);
2869
+ // When interleaving, the number of values will be shrunk until we have the
2870
+ // single final interleaved value.
2871
+ auto *InterleaveTy = cast<VectorType>(InterleavingValues[0 ]->getType ());
2872
+ for (unsigned Midpoint = Factor / 2 ; Midpoint > 0 ; Midpoint /= 2 ) {
2873
+ InterleaveTy = VectorType::getDoubleElementsVectorType (InterleaveTy);
2874
+ for (unsigned I = 0 ; I < Midpoint; ++I)
2875
+ InterleavingValues[I] = Builder.CreateIntrinsic (
2876
+ InterleaveTy, Intrinsic::vector_interleave2,
2877
+ {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2878
+ /* FMFSource=*/ nullptr , Name);
2879
+ }
2880
+ return InterleavingValues[0 ];
2870
2881
}
2871
2882
2872
2883
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2952,15 +2963,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
2952
2963
&InterleaveFactor](Value *MaskForGaps) -> Value * {
2953
2964
if (State.VF .isScalable ()) {
2954
2965
assert (!MaskForGaps && " Interleaved groups with gaps are not supported." );
2955
- assert (InterleaveFactor == 2 &&
2966
+ assert (isPowerOf2_32 ( InterleaveFactor) &&
2956
2967
" Unsupported deinterleave factor for scalable vectors" );
2957
2968
auto *ResBlockInMask = State.get (BlockInMask);
2958
- SmallVector<Value *, 2 > Ops = {ResBlockInMask, ResBlockInMask};
2959
- auto *MaskTy = VectorType::get (State.Builder .getInt1Ty (),
2960
- State.VF .getKnownMinValue () * 2 , true );
2961
- return State.Builder .CreateIntrinsic (
2962
- MaskTy, Intrinsic::vector_interleave2, Ops,
2963
- /* FMFSource=*/ nullptr , " interleaved.mask" );
2969
+ SmallVector<Value *> Ops (InterleaveFactor, ResBlockInMask);
2970
+ return interleaveVectors (State.Builder , Ops, " interleaved.mask" );
2964
2971
}
2965
2972
2966
2973
if (!BlockInMask)
@@ -3000,22 +3007,48 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
3000
3007
ArrayRef<VPValue *> VPDefs = definedValues ();
3001
3008
const DataLayout &DL = State.CFG .PrevBB ->getDataLayout ();
3002
3009
if (VecTy->isScalableTy ()) {
3003
- assert (InterleaveFactor == 2 &&
3010
+ assert (isPowerOf2_32 ( InterleaveFactor) &&
3004
3011
" Unsupported deinterleave factor for scalable vectors" );
3005
3012
3006
- // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3007
- // so must use intrinsics to deinterleave.
3008
- Value *DI = State.Builder .CreateIntrinsic (
3009
- Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3010
- /* FMFSource=*/ nullptr , " strided.vec" );
3011
- unsigned J = 0 ;
3012
- for (unsigned I = 0 ; I < InterleaveFactor; ++I) {
3013
- Instruction *Member = Group->getMember (I);
3013
+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3014
+ // so must use intrinsics to deinterleave.
3015
+ SmallVector<Value *> DeinterleavedValues (InterleaveFactor);
3016
+ DeinterleavedValues[0 ] = NewLoad;
3017
+ // For the case of InterleaveFactor > 2, we will have to do recursive
3018
+ // deinterleaving, because the current available deinterleave intrinsic
3019
+ // supports only Factor of 2, otherwise it will bailout after first
3020
+ // iteration.
3021
+ // When deinterleaving, the number of values will double until we
3022
+ // have "InterleaveFactor".
3023
+ for (unsigned NumVectors = 1 ; NumVectors < InterleaveFactor;
3024
+ NumVectors *= 2 ) {
3025
+ // Deinterleave the elements within the vector
3026
+ SmallVector<Value *> TempDeinterleavedValues (NumVectors);
3027
+ for (unsigned I = 0 ; I < NumVectors; ++I) {
3028
+ auto *DiTy = DeinterleavedValues[I]->getType ();
3029
+ TempDeinterleavedValues[I] = State.Builder .CreateIntrinsic (
3030
+ Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3031
+ /* FMFSource=*/ nullptr , " strided.vec" );
3032
+ }
3033
+ // Extract the deinterleaved values:
3034
+ for (unsigned I = 0 ; I < 2 ; ++I)
3035
+ for (unsigned J = 0 ; J < NumVectors; ++J)
3036
+ DeinterleavedValues[NumVectors * I + J] =
3037
+ State.Builder .CreateExtractValue (TempDeinterleavedValues[J], I);
3038
+ }
3014
3039
3015
- if (!Member)
3040
+ #ifndef NDEBUG
3041
+ for (Value *Val : DeinterleavedValues)
3042
+ assert (Val && " NULL Deinterleaved Value" );
3043
+ #endif
3044
+ for (unsigned I = 0 , J = 0 ; I < InterleaveFactor; ++I) {
3045
+ Instruction *Member = Group->getMember (I);
3046
+ Value *StridedVec = DeinterleavedValues[I];
3047
+ if (!Member) {
3048
+ // This value is not needed as it's not used
3049
+ cast<Instruction>(StridedVec)->eraseFromParent ();
3016
3050
continue ;
3017
-
3018
- Value *StridedVec = State.Builder .CreateExtractValue (DI, I);
3051
+ }
3019
3052
// If this member has different type, cast the result type.
3020
3053
if (Member->getType () != ScalarTy) {
3021
3054
VectorType *OtherVTy = VectorType::get (Member->getType (), State.VF );
0 commit comments