@@ -2561,10 +2561,15 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
2561
2561
const Value *Ptr = getLoadStorePointerOperand (&Ingredient);
2562
2562
assert (!Reverse &&
2563
2563
" Inconsecutive memory access should not have the order." );
2564
- return Ctx.TTI .getAddressComputationCost (Ty) +
2565
- Ctx.TTI .getGatherScatterOpCost (Ingredient.getOpcode (), Ty, Ptr,
2566
- IsMasked, Alignment, Ctx.CostKind ,
2567
- &Ingredient);
2564
+ if (Strided)
2565
+ return Ctx.TTI .getStridedMemoryOpCost (Ingredient.getOpcode (), Ty, Ptr,
2566
+ IsMasked, Alignment, Ctx.CostKind ,
2567
+ &Ingredient);
2568
+ else
2569
+ return Ctx.TTI .getAddressComputationCost (Ty) +
2570
+ Ctx.TTI .getGatherScatterOpCost (Ingredient.getOpcode (), Ty, Ptr,
2571
+ IsMasked, Alignment, Ctx.CostKind ,
2572
+ &Ingredient);
2568
2573
}
2569
2574
2570
2575
InstructionCost Cost = 0 ;
@@ -2591,11 +2596,13 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
2591
2596
Type *ScalarDataTy = getLoadStoreType (&Ingredient);
2592
2597
auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
2593
2598
const Align Alignment = getLoadStoreAlignment (&Ingredient);
2594
- bool CreateGather = !isConsecutive ();
2599
+ bool CreateGather = !isConsecutive () && ! isStrided () ;
2595
2600
2596
2601
auto &Builder = State.Builder ;
2597
2602
State.setDebugLocFrom (getDebugLoc ());
2598
- Value *Mask = nullptr ;
2603
+ Value *Mask = isStrided ()
2604
+ ? Builder.CreateVectorSplat (State.VF , Builder.getTrue ())
2605
+ : nullptr ;
2599
2606
if (auto *VPMask = getMask ()) {
2600
2607
// Mask reversal is only needed for non-all-one (null) masks, as reverse
2601
2608
// of a null all-one mask is a null mask.
@@ -2610,9 +2617,25 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
2610
2617
NewLI = Builder.CreateMaskedGather (DataTy, Addr, Alignment, Mask, nullptr ,
2611
2618
" wide.masked.gather" );
2612
2619
} else if (Mask) {
2613
- NewLI =
2614
- Builder.CreateMaskedLoad (DataTy, Addr, Alignment, Mask,
2615
- PoisonValue::get (DataTy), " wide.masked.load" );
2620
+ if (isStrided ()) {
2621
+ const DataLayout &DL = LI->getDataLayout ();
2622
+ auto *PtrTy = Addr->getType ();
2623
+ auto *StrideTy = DL.getIndexType (PtrTy);
2624
+ // TODO: Support non-unit-reverse strided accesses.
2625
+ auto *StrideVal =
2626
+ ConstantInt::get (StrideTy, -1 * DL.getTypeAllocSize (ScalarDataTy));
2627
+ Value *RuntimeVF =
2628
+ getRuntimeVF (State.Builder , State.Builder .getInt32Ty (), State.VF );
2629
+ NewLI = Builder.CreateIntrinsic (
2630
+ Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2631
+ {Addr, StrideVal, Mask, RuntimeVF}, nullptr , " wide.strided.load" );
2632
+ cast<CallInst>(NewLI)->addParamAttr (
2633
+ 0 , Attribute::getWithAlignment (NewLI->getContext (), Alignment));
2634
+ } else {
2635
+ NewLI = Builder.CreateMaskedLoad (DataTy, Addr, Alignment, Mask,
2636
+ PoisonValue::get (DataTy),
2637
+ " wide.masked.load" );
2638
+ }
2616
2639
} else {
2617
2640
NewLI = Builder.CreateAlignedLoad (DataTy, Addr, Alignment, " wide.load" );
2618
2641
}
@@ -2650,7 +2673,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
2650
2673
Type *ScalarDataTy = getLoadStoreType (&Ingredient);
2651
2674
auto *DataTy = VectorType::get (ScalarDataTy, State.VF );
2652
2675
const Align Alignment = getLoadStoreAlignment (&Ingredient);
2653
- bool CreateGather = !isConsecutive ();
2676
+ bool CreateGather = !isConsecutive () && ! isStrided () ;
2654
2677
2655
2678
auto &Builder = State.Builder ;
2656
2679
State.setDebugLocFrom (getDebugLoc ());
@@ -2670,6 +2693,16 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
2670
2693
NewLI =
2671
2694
Builder.CreateIntrinsic (DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
2672
2695
nullptr , " wide.masked.gather" );
2696
+ } else if (isStrided ()) {
2697
+ const DataLayout &DL = LI->getDataLayout ();
2698
+ auto *PtrTy = Addr->getType ();
2699
+ auto *StrideTy = DL.getIndexType (PtrTy);
2700
+ // TODO: Support non-unit-reverse strided accesses.
2701
+ auto *StrideVal =
2702
+ ConstantInt::get (StrideTy, -1 * DL.getTypeAllocSize (ScalarDataTy));
2703
+ NewLI = Builder.CreateIntrinsic (
2704
+ Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2705
+ {Addr, StrideVal, Mask, EVL}, nullptr , " wide.strided.load" );
2673
2706
} else {
2674
2707
VectorBuilder VBuilder (Builder);
2675
2708
VBuilder.setEVL (EVL).setMask (Mask);
@@ -2724,13 +2757,15 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
2724
2757
auto *SI = cast<StoreInst>(&Ingredient);
2725
2758
2726
2759
VPValue *StoredVPValue = getStoredValue ();
2727
- bool CreateScatter = !isConsecutive ();
2760
+ bool CreateScatter = !isConsecutive () && ! isStrided () ;
2728
2761
const Align Alignment = getLoadStoreAlignment (&Ingredient);
2729
2762
2730
2763
auto &Builder = State.Builder ;
2731
2764
State.setDebugLocFrom (getDebugLoc ());
2732
2765
2733
- Value *Mask = nullptr ;
2766
+ Value *Mask = isStrided ()
2767
+ ? Builder.CreateVectorSplat (State.VF , Builder.getTrue ())
2768
+ : nullptr ;
2734
2769
if (auto *VPMask = getMask ()) {
2735
2770
// Mask reversal is only needed for non-all-one (null) masks, as reverse
2736
2771
// of a null all-one mask is a null mask.
@@ -2749,12 +2784,32 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
2749
2784
}
2750
2785
Value *Addr = State.get (getAddr (), /* IsScalar*/ !CreateScatter);
2751
2786
Instruction *NewSI = nullptr ;
2752
- if (CreateScatter)
2787
+ if (CreateScatter) {
2753
2788
NewSI = Builder.CreateMaskedScatter (StoredVal, Addr, Alignment, Mask);
2754
- else if (Mask)
2755
- NewSI = Builder.CreateMaskedStore (StoredVal, Addr, Alignment, Mask);
2756
- else
2789
+ } else if (Mask) {
2790
+ if (isStrided ()) {
2791
+ const DataLayout &DL = SI->getDataLayout ();
2792
+ auto *StoredVecTy = cast<VectorType>(StoredVal->getType ());
2793
+ Type *StoredEltTy = StoredVecTy->getElementType ();
2794
+ auto *PtrTy = Addr->getType ();
2795
+ auto *StrideTy = DL.getIndexType (PtrTy);
2796
+ // TODO: Support non-unit-reverse strided accesses.
2797
+ auto *StrideVal =
2798
+ ConstantInt::get (StrideTy, -1 * DL.getTypeAllocSize (StoredEltTy));
2799
+ Value *RuntimeVF =
2800
+ getRuntimeVF (State.Builder , State.Builder .getInt32Ty (), State.VF );
2801
+ NewSI = Builder.CreateIntrinsic (
2802
+ Intrinsic::experimental_vp_strided_store,
2803
+ {StoredVecTy, PtrTy, StrideTy},
2804
+ {StoredVal, Addr, StrideVal, Mask, RuntimeVF});
2805
+ cast<CallInst>(NewSI)->addParamAttr (
2806
+ 1 , Attribute::getWithAlignment (NewSI->getContext (), Alignment));
2807
+ } else {
2808
+ NewSI = Builder.CreateMaskedStore (StoredVal, Addr, Alignment, Mask);
2809
+ }
2810
+ } else {
2757
2811
NewSI = Builder.CreateAlignedStore (StoredVal, Addr, Alignment);
2812
+ }
2758
2813
State.addMetadata (NewSI, SI);
2759
2814
}
2760
2815
@@ -2770,7 +2825,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
2770
2825
auto *SI = cast<StoreInst>(&Ingredient);
2771
2826
2772
2827
VPValue *StoredValue = getStoredValue ();
2773
- bool CreateScatter = !isConsecutive ();
2828
+ bool CreateScatter = !isConsecutive () && ! isStrided () ;
2774
2829
const Align Alignment = getLoadStoreAlignment (&Ingredient);
2775
2830
2776
2831
auto &Builder = State.Builder ;
@@ -2795,11 +2850,25 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
2795
2850
Intrinsic::vp_scatter,
2796
2851
{StoredVal, Addr, Mask, EVL});
2797
2852
} else {
2798
- VectorBuilder VBuilder (Builder);
2799
- VBuilder.setEVL (EVL).setMask (Mask);
2800
- NewSI = cast<CallInst>(VBuilder.createVectorInstruction (
2801
- Instruction::Store, Type::getVoidTy (EVL->getContext ()),
2802
- {StoredVal, Addr}));
2853
+ if (isStrided ()) {
2854
+ const DataLayout &DL = SI->getDataLayout ();
2855
+ auto *StoredVecTy = cast<VectorType>(StoredVal->getType ());
2856
+ Type *StoredEltTy = StoredVecTy->getElementType ();
2857
+ auto *PtrTy = Addr->getType ();
2858
+ auto *StrideTy = DL.getIndexType (PtrTy);
2859
+ // TODO: Support non-unit-reverse strided accesses.
2860
+ auto *StrideVal =
2861
+ ConstantInt::get (StrideTy, -1 * DL.getTypeAllocSize (StoredEltTy));
2862
+ NewSI = Builder.CreateIntrinsic (Intrinsic::experimental_vp_strided_store,
2863
+ {StoredVecTy, PtrTy, StrideTy},
2864
+ {StoredVal, Addr, StrideVal, Mask, EVL});
2865
+ } else {
2866
+ VectorBuilder VBuilder (Builder);
2867
+ VBuilder.setEVL (EVL).setMask (Mask);
2868
+ NewSI = cast<CallInst>(VBuilder.createVectorInstruction (
2869
+ Instruction::Store, Type::getVoidTy (EVL->getContext ()),
2870
+ {StoredVal, Addr}));
2871
+ }
2803
2872
}
2804
2873
NewSI->addParamAttr (
2805
2874
1 , Attribute::getWithAlignment (NewSI->getContext (), Alignment));
0 commit comments