Skip to content

Commit d7df8ea

Browse files
committed
Step 3: Patch the implementation of load/store recipes
1 parent 4cdcb91 commit d7df8ea

File tree

5 files changed

+118
-39
lines changed

5 files changed

+118
-39
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8389,12 +8389,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83898389
Ptr = VectorPtr;
83908390
}
83918391
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8392-
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8392+
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, false,
83938393
I->getDebugLoc());
83948394

83958395
StoreInst *Store = cast<StoreInst>(I);
83968396
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8397-
Reverse, I->getDebugLoc());
8397+
Reverse, false, I->getDebugLoc());
83988398
}
83998399

84008400
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2611,6 +2611,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26112611
/// Whether the consecutive accessed addresses are in reverse order.
26122612
bool Reverse;
26132613

2614+
/// Whether the accessed addresses are evenly spaced apart by a fixed stride.
2615+
bool Strided;
2616+
26142617
/// Whether the memory access is masked.
26152618
bool IsMasked = false;
26162619

@@ -2624,9 +2627,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26242627

26252628
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
26262629
std::initializer_list<VPValue *> Operands,
2627-
bool Consecutive, bool Reverse, DebugLoc DL)
2630+
bool Consecutive, bool Reverse, bool Strided, DebugLoc DL)
26282631
: VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
2629-
Reverse(Reverse) {
2632+
Reverse(Reverse), Strided(Strided) {
26302633
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
26312634
}
26322635

@@ -2654,6 +2657,10 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26542657
/// order.
26552658
bool isReverse() const { return Reverse; }
26562659

2660+
/// Return whether the accessed addresses are evenly spaced apart by a fixed
2661+
/// stride.
2662+
bool isStrided() const { return Strided; }
2663+
26572664
/// Return the address accessed by this recipe.
26582665
VPValue *getAddr() const { return getOperand(0); }
26592666

@@ -2683,16 +2690,16 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26832690
/// optional mask.
26842691
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
26852692
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2686-
bool Consecutive, bool Reverse, DebugLoc DL)
2693+
bool Consecutive, bool Reverse, bool Strided, DebugLoc DL)
26872694
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2688-
Reverse, DL),
2695+
Reverse, Strided, DL),
26892696
VPValue(this, &Load) {
26902697
setMask(Mask);
26912698
}
26922699

26932700
VPWidenLoadRecipe *clone() override {
26942701
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2695-
getMask(), Consecutive, Reverse,
2702+
getMask(), Consecutive, Reverse, Strided,
26962703
getDebugLoc());
26972704
}
26982705

@@ -2724,7 +2731,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27242731
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
27252732
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
27262733
{L.getAddr(), &EVL}, L.isConsecutive(),
2727-
L.isReverse(), L.getDebugLoc()),
2734+
L.isReverse(), L.isStrided(), L.getDebugLoc()),
27282735
VPValue(this, &getIngredient()) {
27292736
setMask(Mask);
27302737
}
@@ -2761,16 +2768,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27612768
/// to store to and an optional mask.
27622769
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
27632770
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2764-
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2771+
VPValue *Mask, bool Consecutive, bool Reverse,
2772+
bool Strided, DebugLoc DL)
27652773
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2766-
Consecutive, Reverse, DL) {
2774+
Consecutive, Reverse, Strided, DL) {
27672775
setMask(Mask);
27682776
}
27692777

27702778
VPWidenStoreRecipe *clone() override {
27712779
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
27722780
getStoredValue(), getMask(), Consecutive,
2773-
Reverse, getDebugLoc());
2781+
Reverse, Strided, getDebugLoc());
27742782
}
27752783

27762784
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -2804,7 +2812,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
28042812
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
28052813
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
28062814
{S.getAddr(), S.getStoredValue(), &EVL},
2807-
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
2815+
S.isConsecutive(), S.isReverse(), S.isStrided(),
2816+
S.getDebugLoc()) {
28082817
setMask(Mask);
28092818
}
28102819

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 91 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2609,10 +2609,15 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
26092609
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
26102610
assert(!Reverse &&
26112611
"Inconsecutive memory access should not have the order.");
2612-
return Ctx.TTI.getAddressComputationCost(Ty) +
2613-
Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
2614-
IsMasked, Alignment, Ctx.CostKind,
2615-
&Ingredient);
2612+
if (Strided)
2613+
return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty, Ptr,
2614+
IsMasked, Alignment, Ctx.CostKind,
2615+
&Ingredient);
2616+
else
2617+
return Ctx.TTI.getAddressComputationCost(Ty) +
2618+
Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
2619+
IsMasked, Alignment, Ctx.CostKind,
2620+
&Ingredient);
26162621
}
26172622

26182623
InstructionCost Cost = 0;
@@ -2639,11 +2644,13 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
26392644
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
26402645
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
26412646
const Align Alignment = getLoadStoreAlignment(&Ingredient);
2642-
bool CreateGather = !isConsecutive();
2647+
bool CreateGather = !isConsecutive() && !isStrided();
26432648

26442649
auto &Builder = State.Builder;
26452650
State.setDebugLocFrom(getDebugLoc());
2646-
Value *Mask = nullptr;
2651+
Value *Mask = isStrided()
2652+
? Builder.CreateVectorSplat(State.VF, Builder.getTrue())
2653+
: nullptr;
26472654
if (auto *VPMask = getMask()) {
26482655
// Mask reversal is only needed for non-all-one (null) masks, as reverse
26492656
// of a null all-one mask is a null mask.
@@ -2658,9 +2665,25 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
26582665
NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
26592666
"wide.masked.gather");
26602667
} else if (Mask) {
2661-
NewLI =
2662-
Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2663-
PoisonValue::get(DataTy), "wide.masked.load");
2668+
if (isStrided()) {
2669+
const DataLayout &DL = LI->getDataLayout();
2670+
auto *PtrTy = Addr->getType();
2671+
auto *StrideTy = DL.getIndexType(PtrTy);
2672+
// TODO: Support non-unit-reverse strided accesses.
2673+
auto *StrideVal =
2674+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(ScalarDataTy));
2675+
Value *RuntimeVF =
2676+
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2677+
NewLI = Builder.CreateIntrinsic(
2678+
Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2679+
{Addr, StrideVal, Mask, RuntimeVF}, nullptr, "wide.strided.load");
2680+
cast<CallInst>(NewLI)->addParamAttr(
2681+
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2682+
} else {
2683+
NewLI = Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2684+
PoisonValue::get(DataTy),
2685+
"wide.masked.load");
2686+
}
26642687
} else {
26652688
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
26662689
}
@@ -2698,7 +2721,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26982721
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
26992722
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
27002723
const Align Alignment = getLoadStoreAlignment(&Ingredient);
2701-
bool CreateGather = !isConsecutive();
2724+
bool CreateGather = !isConsecutive() && !isStrided();
27022725

27032726
auto &Builder = State.Builder;
27042727
State.setDebugLocFrom(getDebugLoc());
@@ -2718,6 +2741,16 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
27182741
NewLI =
27192742
Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
27202743
nullptr, "wide.masked.gather");
2744+
} else if (isStrided()) {
2745+
const DataLayout &DL = LI->getDataLayout();
2746+
auto *PtrTy = Addr->getType();
2747+
auto *StrideTy = DL.getIndexType(PtrTy);
2748+
// TODO: Support non-unit-reverse strided accesses.
2749+
auto *StrideVal =
2750+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(ScalarDataTy));
2751+
NewLI = Builder.CreateIntrinsic(
2752+
Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2753+
{Addr, StrideVal, Mask, EVL}, nullptr, "wide.strided.load");
27212754
} else {
27222755
VectorBuilder VBuilder(Builder);
27232756
VBuilder.setEVL(EVL).setMask(Mask);
@@ -2772,13 +2805,15 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
27722805
auto *SI = cast<StoreInst>(&Ingredient);
27732806

27742807
VPValue *StoredVPValue = getStoredValue();
2775-
bool CreateScatter = !isConsecutive();
2808+
bool CreateScatter = !isConsecutive() && !isStrided();
27762809
const Align Alignment = getLoadStoreAlignment(&Ingredient);
27772810

27782811
auto &Builder = State.Builder;
27792812
State.setDebugLocFrom(getDebugLoc());
27802813

2781-
Value *Mask = nullptr;
2814+
Value *Mask = isStrided()
2815+
? Builder.CreateVectorSplat(State.VF, Builder.getTrue())
2816+
: nullptr;
27822817
if (auto *VPMask = getMask()) {
27832818
// Mask reversal is only needed for non-all-one (null) masks, as reverse
27842819
// of a null all-one mask is a null mask.
@@ -2797,12 +2832,32 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
27972832
}
27982833
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
27992834
Instruction *NewSI = nullptr;
2800-
if (CreateScatter)
2835+
if (CreateScatter) {
28012836
NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2802-
else if (Mask)
2803-
NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2804-
else
2837+
} else if (Mask) {
2838+
if (isStrided()) {
2839+
const DataLayout &DL = SI->getDataLayout();
2840+
auto *StoredVecTy = cast<VectorType>(StoredVal->getType());
2841+
Type *StoredEltTy = StoredVecTy->getElementType();
2842+
auto *PtrTy = Addr->getType();
2843+
auto *StrideTy = DL.getIndexType(PtrTy);
2844+
// TODO: Support non-unit-reverse strided accesses.
2845+
auto *StrideVal =
2846+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(StoredEltTy));
2847+
Value *RuntimeVF =
2848+
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2849+
NewSI = Builder.CreateIntrinsic(
2850+
Intrinsic::experimental_vp_strided_store,
2851+
{StoredVecTy, PtrTy, StrideTy},
2852+
{StoredVal, Addr, StrideVal, Mask, RuntimeVF});
2853+
cast<CallInst>(NewSI)->addParamAttr(
2854+
1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2855+
} else {
2856+
NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2857+
}
2858+
} else {
28052859
NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2860+
}
28062861
State.addMetadata(NewSI, SI);
28072862
}
28082863

@@ -2818,7 +2873,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
28182873
auto *SI = cast<StoreInst>(&Ingredient);
28192874

28202875
VPValue *StoredValue = getStoredValue();
2821-
bool CreateScatter = !isConsecutive();
2876+
bool CreateScatter = !isConsecutive() && !isStrided();
28222877
const Align Alignment = getLoadStoreAlignment(&Ingredient);
28232878

28242879
auto &Builder = State.Builder;
@@ -2843,11 +2898,25 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
28432898
Intrinsic::vp_scatter,
28442899
{StoredVal, Addr, Mask, EVL});
28452900
} else {
2846-
VectorBuilder VBuilder(Builder);
2847-
VBuilder.setEVL(EVL).setMask(Mask);
2848-
NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2849-
Instruction::Store, Type::getVoidTy(EVL->getContext()),
2850-
{StoredVal, Addr}));
2901+
if (isStrided()) {
2902+
const DataLayout &DL = SI->getDataLayout();
2903+
auto *StoredVecTy = cast<VectorType>(StoredVal->getType());
2904+
Type *StoredEltTy = StoredVecTy->getElementType();
2905+
auto *PtrTy = Addr->getType();
2906+
auto *StrideTy = DL.getIndexType(PtrTy);
2907+
// TODO: Support non-unit-reverse strided accesses.
2908+
auto *StrideVal =
2909+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(StoredEltTy));
2910+
NewSI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,
2911+
{StoredVecTy, PtrTy, StrideTy},
2912+
{StoredVal, Addr, StrideVal, Mask, EVL});
2913+
} else {
2914+
VectorBuilder VBuilder(Builder);
2915+
VBuilder.setEVL(EVL).setMask(Mask);
2916+
NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2917+
Instruction::Store, Type::getVoidTy(EVL->getContext()),
2918+
{StoredVal, Addr}));
2919+
}
28512920
}
28522921
NewSI->addParamAttr(
28532922
1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
7373
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
7474
NewRecipe = new VPWidenLoadRecipe(
7575
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
76-
false /*Consecutive*/, false /*Reverse*/,
76+
false /*Consecutive*/, false /*Reverse*/, false /*Strided*/,
7777
Ingredient.getDebugLoc());
7878
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
7979
NewRecipe = new VPWidenStoreRecipe(
8080
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
8181
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
82-
Ingredient.getDebugLoc());
82+
false /*Strided*/, Ingredient.getDebugLoc());
8383
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
8484
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
8585
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {

llvm/unittests/Transforms/Vectorize/VPlanTest.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,7 +1084,7 @@ TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) {
10841084
new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
10851085
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
10861086
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
1087-
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {});
1087+
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, false, {});
10881088
EXPECT_TRUE(isa<VPUser>(&Recipe));
10891089
VPRecipeBase *BaseR = &Recipe;
10901090
EXPECT_TRUE(isa<VPUser>(BaseR));
@@ -1195,7 +1195,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
11951195
new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
11961196
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
11971197
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
1198-
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {});
1198+
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, false, {});
11991199
EXPECT_FALSE(Recipe.mayHaveSideEffects());
12001200
EXPECT_TRUE(Recipe.mayReadFromMemory());
12011201
EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1209,7 +1209,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
12091209
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
12101210
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
12111211
VPValue *StoredV = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
1212-
VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, {});
1212+
VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, false,
1213+
{});
12131214
EXPECT_TRUE(Recipe.mayHaveSideEffects());
12141215
EXPECT_FALSE(Recipe.mayReadFromMemory());
12151216
EXPECT_TRUE(Recipe.mayWriteToMemory());

0 commit comments

Comments
 (0)