Skip to content

Commit 4fcaa59

Browse files
committed
Step 3: Patch the implementation of load/store recipes
1 parent 1ae1c1f commit 4fcaa59

File tree

5 files changed

+118
-39
lines changed

5 files changed

+118
-39
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8382,12 +8382,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
83828382
Ptr = VectorPtr;
83838383
}
83848384
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8385-
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8385+
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, false,
83868386
I->getDebugLoc());
83878387

83888388
StoreInst *Store = cast<StoreInst>(I);
83898389
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8390-
Reverse, I->getDebugLoc());
8390+
Reverse, false, I->getDebugLoc());
83918391
}
83928392

83938393
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2560,6 +2560,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
25602560
/// Whether the consecutive accessed addresses are in reverse order.
25612561
bool Reverse;
25622562

2563+
/// Whether the accessed addresses are evenly spaced apart by a fixed stride.
2564+
bool Strided;
2565+
25632566
/// Whether the memory access is masked.
25642567
bool IsMasked = false;
25652568

@@ -2573,9 +2576,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
25732576

25742577
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
25752578
std::initializer_list<VPValue *> Operands,
2576-
bool Consecutive, bool Reverse, DebugLoc DL)
2579+
bool Consecutive, bool Reverse, bool Strided, DebugLoc DL)
25772580
: VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
2578-
Reverse(Reverse) {
2581+
Reverse(Reverse), Strided(Strided) {
25792582
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
25802583
}
25812584

@@ -2603,6 +2606,10 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26032606
/// order.
26042607
bool isReverse() const { return Reverse; }
26052608

2609+
/// Return whether the accessed addresses are evenly spaced apart by a fixed
2610+
/// stride.
2611+
bool isStrided() const { return Strided; }
2612+
26062613
/// Return the address accessed by this recipe.
26072614
VPValue *getAddr() const { return getOperand(0); }
26082615

@@ -2632,16 +2639,16 @@ class VPWidenMemoryRecipe : public VPRecipeBase {
26322639
/// optional mask.
26332640
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
26342641
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2635-
bool Consecutive, bool Reverse, DebugLoc DL)
2642+
bool Consecutive, bool Reverse, bool Strided, DebugLoc DL)
26362643
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2637-
Reverse, DL),
2644+
Reverse, Strided, DL),
26382645
VPValue(this, &Load) {
26392646
setMask(Mask);
26402647
}
26412648

26422649
VPWidenLoadRecipe *clone() override {
26432650
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2644-
getMask(), Consecutive, Reverse,
2651+
getMask(), Consecutive, Reverse, Strided,
26452652
getDebugLoc());
26462653
}
26472654

@@ -2673,7 +2680,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
26732680
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
26742681
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
26752682
{L.getAddr(), &EVL}, L.isConsecutive(),
2676-
L.isReverse(), L.getDebugLoc()),
2683+
L.isReverse(), L.isStrided(), L.getDebugLoc()),
26772684
VPValue(this, &getIngredient()) {
26782685
setMask(Mask);
26792686
}
@@ -2710,16 +2717,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27102717
/// to store to and an optional mask.
27112718
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
27122719
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2713-
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2720+
VPValue *Mask, bool Consecutive, bool Reverse,
2721+
bool Strided, DebugLoc DL)
27142722
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2715-
Consecutive, Reverse, DL) {
2723+
Consecutive, Reverse, Strided, DL) {
27162724
setMask(Mask);
27172725
}
27182726

27192727
VPWidenStoreRecipe *clone() override {
27202728
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
27212729
getStoredValue(), getMask(), Consecutive,
2722-
Reverse, getDebugLoc());
2730+
Reverse, Strided, getDebugLoc());
27232731
}
27242732

27252733
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -2753,7 +2761,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
27532761
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
27542762
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
27552763
{S.getAddr(), S.getStoredValue(), &EVL},
2756-
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
2764+
S.isConsecutive(), S.isReverse(), S.isStrided(),
2765+
S.getDebugLoc()) {
27572766
setMask(Mask);
27582767
}
27592768

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 91 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2561,10 +2561,15 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
25612561
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
25622562
assert(!Reverse &&
25632563
"Inconsecutive memory access should not have the order.");
2564-
return Ctx.TTI.getAddressComputationCost(Ty) +
2565-
Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
2566-
IsMasked, Alignment, Ctx.CostKind,
2567-
&Ingredient);
2564+
if (Strided)
2565+
return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty, Ptr,
2566+
IsMasked, Alignment, Ctx.CostKind,
2567+
&Ingredient);
2568+
else
2569+
return Ctx.TTI.getAddressComputationCost(Ty) +
2570+
Ctx.TTI.getGatherScatterOpCost(Ingredient.getOpcode(), Ty, Ptr,
2571+
IsMasked, Alignment, Ctx.CostKind,
2572+
&Ingredient);
25682573
}
25692574

25702575
InstructionCost Cost = 0;
@@ -2591,11 +2596,13 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
25912596
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
25922597
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
25932598
const Align Alignment = getLoadStoreAlignment(&Ingredient);
2594-
bool CreateGather = !isConsecutive();
2599+
bool CreateGather = !isConsecutive() && !isStrided();
25952600

25962601
auto &Builder = State.Builder;
25972602
State.setDebugLocFrom(getDebugLoc());
2598-
Value *Mask = nullptr;
2603+
Value *Mask = isStrided()
2604+
? Builder.CreateVectorSplat(State.VF, Builder.getTrue())
2605+
: nullptr;
25992606
if (auto *VPMask = getMask()) {
26002607
// Mask reversal is only needed for non-all-one (null) masks, as reverse
26012608
// of a null all-one mask is a null mask.
@@ -2610,9 +2617,25 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
26102617
NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
26112618
"wide.masked.gather");
26122619
} else if (Mask) {
2613-
NewLI =
2614-
Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2615-
PoisonValue::get(DataTy), "wide.masked.load");
2620+
if (isStrided()) {
2621+
const DataLayout &DL = LI->getDataLayout();
2622+
auto *PtrTy = Addr->getType();
2623+
auto *StrideTy = DL.getIndexType(PtrTy);
2624+
// TODO: Support non-unit-reverse strided accesses.
2625+
auto *StrideVal =
2626+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(ScalarDataTy));
2627+
Value *RuntimeVF =
2628+
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2629+
NewLI = Builder.CreateIntrinsic(
2630+
Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2631+
{Addr, StrideVal, Mask, RuntimeVF}, nullptr, "wide.strided.load");
2632+
cast<CallInst>(NewLI)->addParamAttr(
2633+
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2634+
} else {
2635+
NewLI = Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
2636+
PoisonValue::get(DataTy),
2637+
"wide.masked.load");
2638+
}
26162639
} else {
26172640
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
26182641
}
@@ -2650,7 +2673,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26502673
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
26512674
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
26522675
const Align Alignment = getLoadStoreAlignment(&Ingredient);
2653-
bool CreateGather = !isConsecutive();
2676+
bool CreateGather = !isConsecutive() && !isStrided();
26542677

26552678
auto &Builder = State.Builder;
26562679
State.setDebugLocFrom(getDebugLoc());
@@ -2670,6 +2693,16 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
26702693
NewLI =
26712694
Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
26722695
nullptr, "wide.masked.gather");
2696+
} else if (isStrided()) {
2697+
const DataLayout &DL = LI->getDataLayout();
2698+
auto *PtrTy = Addr->getType();
2699+
auto *StrideTy = DL.getIndexType(PtrTy);
2700+
// TODO: Support non-unit-reverse strided accesses.
2701+
auto *StrideVal =
2702+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(ScalarDataTy));
2703+
NewLI = Builder.CreateIntrinsic(
2704+
Intrinsic::experimental_vp_strided_load, {DataTy, PtrTy, StrideTy},
2705+
{Addr, StrideVal, Mask, EVL}, nullptr, "wide.strided.load");
26732706
} else {
26742707
VectorBuilder VBuilder(Builder);
26752708
VBuilder.setEVL(EVL).setMask(Mask);
@@ -2724,13 +2757,15 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
27242757
auto *SI = cast<StoreInst>(&Ingredient);
27252758

27262759
VPValue *StoredVPValue = getStoredValue();
2727-
bool CreateScatter = !isConsecutive();
2760+
bool CreateScatter = !isConsecutive() && !isStrided();
27282761
const Align Alignment = getLoadStoreAlignment(&Ingredient);
27292762

27302763
auto &Builder = State.Builder;
27312764
State.setDebugLocFrom(getDebugLoc());
27322765

2733-
Value *Mask = nullptr;
2766+
Value *Mask = isStrided()
2767+
? Builder.CreateVectorSplat(State.VF, Builder.getTrue())
2768+
: nullptr;
27342769
if (auto *VPMask = getMask()) {
27352770
// Mask reversal is only needed for non-all-one (null) masks, as reverse
27362771
// of a null all-one mask is a null mask.
@@ -2749,12 +2784,32 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
27492784
}
27502785
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
27512786
Instruction *NewSI = nullptr;
2752-
if (CreateScatter)
2787+
if (CreateScatter) {
27532788
NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
2754-
else if (Mask)
2755-
NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2756-
else
2789+
} else if (Mask) {
2790+
if (isStrided()) {
2791+
const DataLayout &DL = SI->getDataLayout();
2792+
auto *StoredVecTy = cast<VectorType>(StoredVal->getType());
2793+
Type *StoredEltTy = StoredVecTy->getElementType();
2794+
auto *PtrTy = Addr->getType();
2795+
auto *StrideTy = DL.getIndexType(PtrTy);
2796+
// TODO: Support non-unit-reverse strided accesses.
2797+
auto *StrideVal =
2798+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(StoredEltTy));
2799+
Value *RuntimeVF =
2800+
getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2801+
NewSI = Builder.CreateIntrinsic(
2802+
Intrinsic::experimental_vp_strided_store,
2803+
{StoredVecTy, PtrTy, StrideTy},
2804+
{StoredVal, Addr, StrideVal, Mask, RuntimeVF});
2805+
cast<CallInst>(NewSI)->addParamAttr(
2806+
1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2807+
} else {
2808+
NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
2809+
}
2810+
} else {
27572811
NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2812+
}
27582813
State.addMetadata(NewSI, SI);
27592814
}
27602815

@@ -2770,7 +2825,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
27702825
auto *SI = cast<StoreInst>(&Ingredient);
27712826

27722827
VPValue *StoredValue = getStoredValue();
2773-
bool CreateScatter = !isConsecutive();
2828+
bool CreateScatter = !isConsecutive() && !isStrided();
27742829
const Align Alignment = getLoadStoreAlignment(&Ingredient);
27752830

27762831
auto &Builder = State.Builder;
@@ -2795,11 +2850,25 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
27952850
Intrinsic::vp_scatter,
27962851
{StoredVal, Addr, Mask, EVL});
27972852
} else {
2798-
VectorBuilder VBuilder(Builder);
2799-
VBuilder.setEVL(EVL).setMask(Mask);
2800-
NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2801-
Instruction::Store, Type::getVoidTy(EVL->getContext()),
2802-
{StoredVal, Addr}));
2853+
if (isStrided()) {
2854+
const DataLayout &DL = SI->getDataLayout();
2855+
auto *StoredVecTy = cast<VectorType>(StoredVal->getType());
2856+
Type *StoredEltTy = StoredVecTy->getElementType();
2857+
auto *PtrTy = Addr->getType();
2858+
auto *StrideTy = DL.getIndexType(PtrTy);
2859+
// TODO: Support non-unit-reverse strided accesses.
2860+
auto *StrideVal =
2861+
ConstantInt::get(StrideTy, -1 * DL.getTypeAllocSize(StoredEltTy));
2862+
NewSI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,
2863+
{StoredVecTy, PtrTy, StrideTy},
2864+
{StoredVal, Addr, StrideVal, Mask, EVL});
2865+
} else {
2866+
VectorBuilder VBuilder(Builder);
2867+
VBuilder.setEVL(EVL).setMask(Mask);
2868+
NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
2869+
Instruction::Store, Type::getVoidTy(EVL->getContext()),
2870+
{StoredVal, Addr}));
2871+
}
28032872
}
28042873
NewSI->addParamAttr(
28052874
1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
7373
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
7474
NewRecipe = new VPWidenLoadRecipe(
7575
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
76-
false /*Consecutive*/, false /*Reverse*/,
76+
false /*Consecutive*/, false /*Reverse*/, false /*Strided*/,
7777
Ingredient.getDebugLoc());
7878
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
7979
NewRecipe = new VPWidenStoreRecipe(
8080
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
8181
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
82-
Ingredient.getDebugLoc());
82+
false /*Strided*/, Ingredient.getDebugLoc());
8383
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
8484
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
8585
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {

llvm/unittests/Transforms/Vectorize/VPlanTest.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,7 +1084,7 @@ TEST_F(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) {
10841084
new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
10851085
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
10861086
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
1087-
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {});
1087+
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, false, {});
10881088
EXPECT_TRUE(isa<VPUser>(&Recipe));
10891089
VPRecipeBase *BaseR = &Recipe;
10901090
EXPECT_TRUE(isa<VPUser>(BaseR));
@@ -1195,7 +1195,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
11951195
new LoadInst(Int32, PoisonValue::get(Int32Ptr), "", false, Align(1));
11961196
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
11971197
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
1198-
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, {});
1198+
VPWidenLoadRecipe Recipe(*Load, Addr, Mask, true, false, false, {});
11991199
EXPECT_FALSE(Recipe.mayHaveSideEffects());
12001200
EXPECT_TRUE(Recipe.mayReadFromMemory());
12011201
EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1209,7 +1209,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
12091209
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
12101210
VPValue *Addr = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
12111211
VPValue *StoredV = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
1212-
VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, {});
1212+
VPWidenStoreRecipe Recipe(*Store, Addr, StoredV, Mask, false, false, false,
1213+
{});
12131214
EXPECT_TRUE(Recipe.mayHaveSideEffects());
12141215
EXPECT_FALSE(Recipe.mayReadFromMemory());
12151216
EXPECT_TRUE(Recipe.mayWriteToMemory());

0 commit comments

Comments
 (0)