@@ -2789,21 +2789,10 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
2789
2789
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2790
2790
// must use intrinsics to interleave.
2791
2791
if (VecTy->isScalableTy ()) {
2792
- assert (isPowerOf2_32 (Factor) && " Unsupported interleave factor for "
2793
- " scalable vectors, must be power of 2" );
2794
- SmallVector<Value *> InterleavingValues (Vals);
2795
- // When interleaving, the number of values will be shrunk until we have the
2796
- // single final interleaved value.
2797
- auto *InterleaveTy = cast<VectorType>(InterleavingValues[0 ]->getType ());
2798
- for (unsigned Midpoint = Factor / 2 ; Midpoint > 0 ; Midpoint /= 2 ) {
2799
- InterleaveTy = VectorType::getDoubleElementsVectorType (InterleaveTy);
2800
- for (unsigned I = 0 ; I < Midpoint; ++I)
2801
- InterleavingValues[I] = Builder.CreateIntrinsic (
2802
- InterleaveTy, Intrinsic::vector_interleave2,
2803
- {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2804
- /* FMFSource=*/ nullptr , Name);
2805
- }
2806
- return InterleavingValues[0 ];
2792
+ VectorType *WideVecTy = VectorType::getDoubleElementsVectorType (VecTy);
2793
+ return Builder.CreateIntrinsic (WideVecTy, Intrinsic::vector_interleave2,
2794
+ Vals,
2795
+ /* FMFSource=*/ nullptr , Name);
2807
2796
}
2808
2797
2809
2798
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2889,11 +2878,15 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
2889
2878
&InterleaveFactor](Value *MaskForGaps) -> Value * {
2890
2879
if (State.VF .isScalable ()) {
2891
2880
assert (!MaskForGaps && " Interleaved groups with gaps are not supported." );
2892
- assert (isPowerOf2_32 ( InterleaveFactor) &&
2881
+ assert (InterleaveFactor == 2 &&
2893
2882
" Unsupported deinterleave factor for scalable vectors" );
2894
2883
auto *ResBlockInMask = State.get (BlockInMask);
2895
- SmallVector<Value *> Ops (InterleaveFactor, ResBlockInMask);
2896
- return interleaveVectors (State.Builder , Ops, " interleaved.mask" );
2884
+ SmallVector<Value *, 2 > Ops = {ResBlockInMask, ResBlockInMask};
2885
+ auto *MaskTy = VectorType::get (State.Builder .getInt1Ty (),
2886
+ State.VF .getKnownMinValue () * 2 , true );
2887
+ return State.Builder .CreateIntrinsic (
2888
+ MaskTy, Intrinsic::vector_interleave2, Ops,
2889
+ /* FMFSource=*/ nullptr , " interleaved.mask" );
2897
2890
}
2898
2891
2899
2892
if (!BlockInMask)
@@ -2933,48 +2926,22 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
2933
2926
ArrayRef<VPValue *> VPDefs = definedValues ();
2934
2927
const DataLayout &DL = State.CFG .PrevBB ->getDataLayout ();
2935
2928
if (VecTy->isScalableTy ()) {
2936
- assert (isPowerOf2_32 ( InterleaveFactor) &&
2929
+ assert (InterleaveFactor == 2 &&
2937
2930
" Unsupported deinterleave factor for scalable vectors" );
2938
2931
2939
- // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2940
- // so must use intrinsics to deinterleave.
2941
- SmallVector<Value *> DeinterleavedValues (InterleaveFactor);
2942
- DeinterleavedValues[0 ] = NewLoad;
2943
- // For the case of InterleaveFactor > 2, we will have to do recursive
2944
- // deinterleaving, because the current available deinterleave intrinsic
2945
- // supports only Factor of 2, otherwise it will bailout after first
2946
- // iteration.
2947
- // When deinterleaving, the number of values will double until we
2948
- // have "InterleaveFactor".
2949
- for (unsigned NumVectors = 1 ; NumVectors < InterleaveFactor;
2950
- NumVectors *= 2 ) {
2951
- // Deinterleave the elements within the vector
2952
- SmallVector<Value *> TempDeinterleavedValues (NumVectors);
2953
- for (unsigned I = 0 ; I < NumVectors; ++I) {
2954
- auto *DiTy = DeinterleavedValues[I]->getType ();
2955
- TempDeinterleavedValues[I] = State.Builder .CreateIntrinsic (
2956
- Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
2957
- /* FMFSource=*/ nullptr , " strided.vec" );
2958
- }
2959
- // Extract the deinterleaved values:
2960
- for (unsigned I = 0 ; I < 2 ; ++I)
2961
- for (unsigned J = 0 ; J < NumVectors; ++J)
2962
- DeinterleavedValues[NumVectors * I + J] =
2963
- State.Builder .CreateExtractValue (TempDeinterleavedValues[J], I);
2964
- }
2965
-
2966
- #ifndef NDEBUG
2967
- for (Value *Val : DeinterleavedValues)
2968
- assert (Val && " NULL Deinterleaved Value" );
2969
- #endif
2970
- for (unsigned I = 0 , J = 0 ; I < InterleaveFactor; ++I) {
2932
+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2933
+ // so must use intrinsics to deinterleave.
2934
+ Value *DI = State.Builder .CreateIntrinsic (
2935
+ Intrinsic::vector_deinterleave2, VecTy, NewLoad,
2936
+ /* FMFSource=*/ nullptr , " strided.vec" );
2937
+ unsigned J = 0 ;
2938
+ for (unsigned I = 0 ; I < InterleaveFactor; ++I) {
2971
2939
Instruction *Member = Group->getMember (I);
2972
- Value *StridedVec = DeinterleavedValues[I];
2973
- if (!Member) {
2974
- // This value is not needed as it's not used
2975
- static_cast <Instruction *>(StridedVec)->eraseFromParent ();
2940
+
2941
+ if (!Member)
2976
2942
continue ;
2977
- }
2943
+
2944
+ Value *StridedVec = State.Builder .CreateExtractValue (DI, I);
2978
2945
// If this member has different type, cast the result type.
2979
2946
if (Member->getType () != ScalarTy) {
2980
2947
VectorType *OtherVTy = VectorType::get (Member->getType (), State.VF );
0 commit comments