@@ -2609,10 +2609,15 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
2609
2609
const Value *Ptr = getLoadStorePointerOperand (&Ingredient);
2610
2610
assert (!Reverse &&
2611
2611
" Inconsecutive memory access should not have the order." );
2612
- return Ctx.TTI .getAddressComputationCost (Ty) +
2613
- Ctx.TTI .getGatherScatterOpCost (Ingredient.getOpcode (), Ty, Ptr,
2614
- IsMasked, Alignment, Ctx.CostKind ,
2615
- &Ingredient);
2612
+ if (Strided)
2613
+ return Ctx.TTI .getStridedMemoryOpCost (Ingredient.getOpcode (), Ty, Ptr,
2614
+ IsMasked, Alignment, Ctx.CostKind ,
2615
+ &Ingredient);
2616
+ else
2617
+ return Ctx.TTI .getAddressComputationCost (Ty) +
2618
+ Ctx.TTI .getGatherScatterOpCost (Ingredient.getOpcode (), Ty, Ptr,
2619
+ IsMasked, Alignment, Ctx.CostKind ,
2620
+ &Ingredient);
2616
2621
}
2617
2622
2618
2623
InstructionCost Cost = 0 ;
@@ -2639,11 +2644,13 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
2639
2644
Type *ScalarDataTy = getLoadStoreType (&Ingredient);
2640
2645
auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
2641
2646
const Align Alignment = getLoadStoreAlignment (&Ingredient);
2642
- bool CreateGather = !isConsecutive ();
2647
+ bool CreateGather = !isConsecutive () && ! isStrided () ;
2643
2648
2644
2649
auto &Builder = State.Builder ;
2645
2650
State.setDebugLocFrom (getDebugLoc ());
2646
- Value *Mask = nullptr ;
2651
+ Value *Mask = isStrided ()
2652
+ ? Builder.CreateVectorSplat (State.VF , Builder.getTrue ())
2653
+ : nullptr ;
2647
2654
if (auto *VPMask = getMask ()) {
2648
2655
// Mask reversal is only needed for non-all-one (null) masks, as reverse
2649
2656
// of a null all-one mask is a null mask.
@@ -2658,9 +2665,25 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
2658
2665
NewLI = Builder.CreateMaskedGather (DataTy, Addr, Alignment, Mask, nullptr ,
2659
2666
" wide.masked.gather" );
2660
2667
} else if (Mask) {
2661
- NewLI =
2662
- Builder.CreateMaskedLoad (DataTy, Addr, Alignment, Mask,
2663
- PoisonValue::get (DataTy), " wide.masked.load" );
2668
+ if (isStrided ()) {
2669
+ const DataLayout &DL = LI->getDataLayout ();
2670
+ auto *PtrTy = Addr->getType ();
2671
+ auto *StrideTy = DL.getIndexType (PtrTy);
2672
+ // TODO: Support non-unit-reverse strided accesses.
2673
+ auto *StrideVal =
2674
+ ConstantInt::get (StrideTy, -1 * DL.getTypeAllocSize (ScalarDataTy));
2675
+ Value *RuntimeVF =
2676
+ getRuntimeVF (State.Builder , State.Builder .getInt32Ty (), State.VF );
2677
+ NewLI = Builder.CreateIntrinsic (
2678
+ Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2679
+ {Addr, StrideVal, Mask, RuntimeVF}, nullptr , " wide.strided.load" );
2680
+ cast<CallInst>(NewLI)->addParamAttr (
2681
+ 0 , Attribute::getWithAlignment (NewLI->getContext (), Alignment));
2682
+ } else {
2683
+ NewLI = Builder.CreateMaskedLoad (DataTy, Addr, Alignment, Mask,
2684
+ PoisonValue::get (DataTy),
2685
+ " wide.masked.load" );
2686
+ }
2664
2687
} else {
2665
2688
NewLI = Builder.CreateAlignedLoad (DataTy, Addr, Alignment, " wide.load" );
2666
2689
}
@@ -2698,7 +2721,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
2698
2721
Type *ScalarDataTy = getLoadStoreType (&Ingredient);
2699
2722
auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
2700
2723
const Align Alignment = getLoadStoreAlignment (&Ingredient);
2701
- bool CreateGather = !isConsecutive ();
2724
+ bool CreateGather = !isConsecutive () && ! isStrided () ;
2702
2725
2703
2726
auto &Builder = State.Builder ;
2704
2727
State.setDebugLocFrom (getDebugLoc ());
@@ -2718,6 +2741,16 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
2718
2741
NewLI =
2719
2742
Builder.CreateIntrinsic (DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2720
2743
nullptr , " wide.masked.gather" );
2744
+ } else if (isStrided ()) {
2745
+ const DataLayout &DL = LI->getDataLayout ();
2746
+ auto *PtrTy = Addr->getType ();
2747
+ auto *StrideTy = DL.getIndexType (PtrTy);
2748
+ // TODO: Support non-unit-reverse strided accesses.
2749
+ auto *StrideVal =
2750
+ ConstantInt::get (StrideTy, -1 * DL.getTypeAllocSize (ScalarDataTy));
2751
+ NewLI = Builder.CreateIntrinsic (
2752
+ Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2753
+ {Addr, StrideVal, Mask, EVL}, nullptr , " wide.strided.load" );
2721
2754
} else {
2722
2755
VectorBuilder VBuilder (Builder);
2723
2756
VBuilder.setEVL (EVL).setMask (Mask);
@@ -2772,13 +2805,15 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
2772
2805
auto *SI = cast<StoreInst>(&Ingredient);
2773
2806
2774
2807
VPValue *StoredVPValue = getStoredValue ();
2775
- bool CreateScatter = !isConsecutive ();
2808
+ bool CreateScatter = !isConsecutive () && ! isStrided () ;
2776
2809
const Align Alignment = getLoadStoreAlignment (&Ingredient);
2777
2810
2778
2811
auto &Builder = State.Builder ;
2779
2812
State.setDebugLocFrom (getDebugLoc ());
2780
2813
2781
- Value *Mask = nullptr ;
2814
+ Value *Mask = isStrided ()
2815
+ ? Builder.CreateVectorSplat (State.VF , Builder.getTrue ())
2816
+ : nullptr ;
2782
2817
if (auto *VPMask = getMask ()) {
2783
2818
// Mask reversal is only needed for non-all-one (null) masks, as reverse
2784
2819
// of a null all-one mask is a null mask.
@@ -2797,12 +2832,32 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
2797
2832
}
2798
2833
Value *Addr = State.get (getAddr (), /* IsScalar*/ !CreateScatter);
2799
2834
Instruction *NewSI = nullptr ;
2800
- if (CreateScatter)
2835
+ if (CreateScatter) {
2801
2836
NewSI = Builder.CreateMaskedScatter (StoredVal, Addr, Alignment, Mask);
2802
- else if (Mask)
2803
- NewSI = Builder.CreateMaskedStore (StoredVal, Addr, Alignment, Mask);
2804
- else
2837
+ } else if (Mask) {
2838
+ if (isStrided ()) {
2839
+ const DataLayout &DL = SI->getDataLayout ();
2840
+ auto *StoredVecTy = cast<VectorType>(StoredVal->getType ());
2841
+ Type *StoredEltTy = StoredVecTy->getElementType ();
2842
+ auto *PtrTy = Addr->getType ();
2843
+ auto *StrideTy = DL.getIndexType (PtrTy);
2844
+ // TODO: Support non-unit-reverse strided accesses.
2845
+ auto *StrideVal =
2846
+ ConstantInt::get (StrideTy, -1 * DL.getTypeAllocSize (StoredEltTy));
2847
+ Value *RuntimeVF =
2848
+ getRuntimeVF (State.Builder , State.Builder .getInt32Ty (), State.VF );
2849
+ NewSI = Builder.CreateIntrinsic (
2850
+ Intrinsic::experimental_vp_strided_store,
2851
+ {StoredVecTy, PtrTy, StrideTy},
2852
+ {StoredVal, Addr, StrideVal, Mask, RuntimeVF});
2853
+ cast<CallInst>(NewSI)->addParamAttr (
2854
+ 1 , Attribute::getWithAlignment (NewSI->getContext (), Alignment));
2855
+ } else {
2856
+ NewSI = Builder.CreateMaskedStore (StoredVal, Addr, Alignment, Mask);
2857
+ }
2858
+ } else {
2805
2859
NewSI = Builder.CreateAlignedStore (StoredVal, Addr, Alignment);
2860
+ }
2806
2861
State.addMetadata (NewSI, SI);
2807
2862
}
2808
2863
@@ -2818,7 +2873,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
2818
2873
auto *SI = cast<StoreInst>(&Ingredient);
2819
2874
2820
2875
VPValue *StoredValue = getStoredValue ();
2821
- bool CreateScatter = !isConsecutive ();
2876
+ bool CreateScatter = !isConsecutive () && ! isStrided () ;
2822
2877
const Align Alignment = getLoadStoreAlignment (&Ingredient);
2823
2878
2824
2879
auto &Builder = State.Builder ;
@@ -2843,11 +2898,25 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
2843
2898
Intrinsic::vp_scatter,
2844
2899
{StoredVal, Addr, Mask, EVL});
2845
2900
} else {
2846
- VectorBuilder VBuilder (Builder);
2847
- VBuilder.setEVL (EVL).setMask (Mask);
2848
- NewSI = cast<CallInst>(VBuilder.createVectorInstruction (
2849
- Instruction::Store, Type::getVoidTy (EVL->getContext ()),
2850
- {StoredVal, Addr}));
2901
+ if (isStrided ()) {
2902
+ const DataLayout &DL = SI->getDataLayout ();
2903
+ auto *StoredVecTy = cast<VectorType>(StoredVal->getType ());
2904
+ Type *StoredEltTy = StoredVecTy->getElementType ();
2905
+ auto *PtrTy = Addr->getType ();
2906
+ auto *StrideTy = DL.getIndexType (PtrTy);
2907
+ // TODO: Support non-unit-reverse strided accesses.
2908
+ auto *StrideVal =
2909
+ ConstantInt::get (StrideTy, -1 * DL.getTypeAllocSize (StoredEltTy));
2910
+ NewSI = Builder.CreateIntrinsic (Intrinsic::experimental_vp_strided_store,
2911
+ {StoredVecTy, PtrTy, StrideTy},
2912
+ {StoredVal, Addr, StrideVal, Mask, EVL});
2913
+ } else {
2914
+ VectorBuilder VBuilder (Builder);
2915
+ VBuilder.setEVL (EVL).setMask (Mask);
2916
+ NewSI = cast<CallInst>(VBuilder.createVectorInstruction (
2917
+ Instruction::Store, Type::getVoidTy (EVL->getContext ()),
2918
+ {StoredVal, Addr}));
2919
+ }
2851
2920
}
2852
2921
NewSI->addParamAttr (
2853
2922
1 , Attribute::getWithAlignment (NewSI->getContext (), Alignment));
0 commit comments