@@ -2868,10 +2868,21 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
2868
2868
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2869
2869
// must use intrinsics to interleave.
2870
2870
if (VecTy->isScalableTy ()) {
2871
- VectorType *WideVecTy = VectorType::getDoubleElementsVectorType (VecTy);
2872
- return Builder.CreateIntrinsic (WideVecTy, Intrinsic::vector_interleave2,
2873
- Vals,
2874
- /* FMFSource=*/ nullptr , Name);
2871
+ assert (isPowerOf2_32 (Factor) && " Unsupported interleave factor for "
2872
+ " scalable vectors, must be power of 2" );
2873
+ SmallVector<Value *> InterleavingValues (Vals);
2874
+ // When interleaving, the number of values will be shrunk until we have the
2875
+ // single final interleaved value.
2876
+ auto *InterleaveTy = cast<VectorType>(InterleavingValues[0 ]->getType ());
2877
+ for (unsigned Midpoint = Factor / 2 ; Midpoint > 0 ; Midpoint /= 2 ) {
2878
+ InterleaveTy = VectorType::getDoubleElementsVectorType (InterleaveTy);
2879
+ for (unsigned I = 0 ; I < Midpoint; ++I)
2880
+ InterleavingValues[I] = Builder.CreateIntrinsic (
2881
+ InterleaveTy, Intrinsic::vector_interleave2,
2882
+ {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2883
+ /* FMFSource=*/ nullptr , Name);
2884
+ }
2885
+ return InterleavingValues[0 ];
2875
2886
}
2876
2887
2877
2888
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2957,15 +2968,11 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
2957
2968
&InterleaveFactor](Value *MaskForGaps) -> Value * {
2958
2969
if (State.VF .isScalable ()) {
2959
2970
assert (!MaskForGaps && " Interleaved groups with gaps are not supported." );
2960
- assert (InterleaveFactor == 2 &&
2971
+ assert (isPowerOf2_32 ( InterleaveFactor) &&
2961
2972
" Unsupported deinterleave factor for scalable vectors" );
2962
2973
auto *ResBlockInMask = State.get (BlockInMask);
2963
- SmallVector<Value *, 2 > Ops = {ResBlockInMask, ResBlockInMask};
2964
- auto *MaskTy = VectorType::get (State.Builder .getInt1Ty (),
2965
- State.VF .getKnownMinValue () * 2 , true );
2966
- return State.Builder .CreateIntrinsic (
2967
- MaskTy, Intrinsic::vector_interleave2, Ops,
2968
- /* FMFSource=*/ nullptr , " interleaved.mask" );
2974
+ SmallVector<Value *> Ops (InterleaveFactor, ResBlockInMask);
2975
+ return interleaveVectors (State.Builder , Ops, " interleaved.mask" );
2969
2976
}
2970
2977
2971
2978
if (!BlockInMask)
@@ -3005,22 +3012,48 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
3005
3012
ArrayRef<VPValue *> VPDefs = definedValues ();
3006
3013
const DataLayout &DL = State.CFG .PrevBB ->getDataLayout ();
3007
3014
if (VecTy->isScalableTy ()) {
3008
- assert (InterleaveFactor == 2 &&
3015
+ assert (isPowerOf2_32 ( InterleaveFactor) &&
3009
3016
" Unsupported deinterleave factor for scalable vectors" );
3010
3017
3011
- // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3012
- // so must use intrinsics to deinterleave.
3013
- Value *DI = State.Builder .CreateIntrinsic (
3014
- Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3015
- /* FMFSource=*/ nullptr , " strided.vec" );
3016
- unsigned J = 0 ;
3017
- for (unsigned I = 0 ; I < InterleaveFactor; ++I) {
3018
- Instruction *Member = Group->getMember (I);
3018
+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3019
+ // so must use intrinsics to deinterleave.
3020
+ SmallVector<Value *> DeinterleavedValues (InterleaveFactor);
3021
+ DeinterleavedValues[0 ] = NewLoad;
3022
+ // For the case of InterleaveFactor > 2, we will have to do recursive
3023
+ // deinterleaving, because the current available deinterleave intrinsic
3024
+ // supports only Factor of 2, otherwise it will bailout after first
3025
+ // iteration.
3026
+ // When deinterleaving, the number of values will double until we
3027
+ // have "InterleaveFactor".
3028
+ for (unsigned NumVectors = 1 ; NumVectors < InterleaveFactor;
3029
+ NumVectors *= 2 ) {
3030
+ // Deinterleave the elements within the vector
3031
+ SmallVector<Value *> TempDeinterleavedValues (NumVectors);
3032
+ for (unsigned I = 0 ; I < NumVectors; ++I) {
3033
+ auto *DiTy = DeinterleavedValues[I]->getType ();
3034
+ TempDeinterleavedValues[I] = State.Builder .CreateIntrinsic (
3035
+ Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3036
+ /* FMFSource=*/ nullptr , " strided.vec" );
3037
+ }
3038
+ // Extract the deinterleaved values:
3039
+ for (unsigned I = 0 ; I < 2 ; ++I)
3040
+ for (unsigned J = 0 ; J < NumVectors; ++J)
3041
+ DeinterleavedValues[NumVectors * I + J] =
3042
+ State.Builder .CreateExtractValue (TempDeinterleavedValues[J], I);
3043
+ }
3019
3044
3020
- if (!Member)
3045
+ #ifndef NDEBUG
3046
+ for (Value *Val : DeinterleavedValues)
3047
+ assert (Val && " NULL Deinterleaved Value" );
3048
+ #endif
3049
+ for (unsigned I = 0 , J = 0 ; I < InterleaveFactor; ++I) {
3050
+ Instruction *Member = Group->getMember (I);
3051
+ Value *StridedVec = DeinterleavedValues[I];
3052
+ if (!Member) {
3053
+ // This value is not needed as it's not used
3054
+ cast<Instruction>(StridedVec)->eraseFromParent ();
3021
3055
continue ;
3022
-
3023
- Value *StridedVec = State.Builder .CreateExtractValue (DI, I);
3056
+ }
3024
3057
// If this member has different type, cast the result type.
3025
3058
if (Member->getType () != ScalarTy) {
3026
3059
VectorType *OtherVTy = VectorType::get (Member->getType (), State.VF );
0 commit comments