Skip to content

Commit 2ec0e32

Browse files
committed
[VPlan] Split VPWidenMemoryInstructionRecipe (NFCI).
#87411
1 parent f0724f0 commit 2ec0e32

22 files changed

+283
-221
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 88 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -8095,7 +8095,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
80958095
BlockMaskCache[BB] = BlockMask;
80968096
}
80978097

8098-
VPWidenMemoryInstructionRecipe *
8098+
VPWidenMemoryRecipe *
80998099
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
81008100
VFRange &Range) {
81018101
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -8140,12 +8140,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
81408140
Ptr = VectorPtr;
81418141
}
81428142
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8143-
return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
8144-
Reverse, I->getDebugLoc());
8143+
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8144+
I->getDebugLoc());
81458145

81468146
StoreInst *Store = cast<StoreInst>(I);
8147-
return new VPWidenMemoryInstructionRecipe(
8148-
*Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc());
8147+
return new VPWidenStoreRecipe(*Store, Operands[0], Ptr, Mask, Consecutive,
8148+
Reverse, I->getDebugLoc());
81498149
}
81508150

81518151
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8780,13 +8780,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
87808780
// for this VPlan, replace the Recipes widening its memory instructions with a
87818781
// single VPInterleaveRecipe at its insertion point.
87828782
for (const auto *IG : InterleaveGroups) {
8783-
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
8784-
RecipeBuilder.getRecipe(IG->getInsertPos()));
8783+
auto *Recipe =
8784+
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
87858785
SmallVector<VPValue *, 4> StoredValues;
87868786
for (unsigned i = 0; i < IG->getFactor(); ++i)
87878787
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
8788-
auto *StoreR =
8789-
cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
8788+
auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
87908789
StoredValues.push_back(StoreR->getStoredValue());
87918790
}
87928791

@@ -9464,27 +9463,19 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
94649463
return Call;
94659464
}
94669465

9467-
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
9468-
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
9469-
9466+
void VPWidenLoadRecipe::execute(VPTransformState &State) {
94709467
// Attempt to issue a wide load.
9471-
LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
9472-
StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
9473-
9474-
assert((LI || SI) && "Invalid Load/Store instruction");
9475-
assert((!SI || StoredValue) && "No stored value provided for widened store");
9476-
assert((!LI || !StoredValue) && "Stored value provided for widened load");
9468+
auto *LI = cast<LoadInst>(&Ingredient);
94779469

94789470
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
9479-
94809471
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
94819472
const Align Alignment = getLoadStoreAlignment(&Ingredient);
9482-
bool CreateGatherScatter = !isConsecutive();
9473+
bool CreateGather = !isConsecutive();
94839474

94849475
auto &Builder = State.Builder;
94859476
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
9486-
bool isMaskRequired = getMask();
9487-
if (isMaskRequired) {
9477+
bool IsMaskRequired = getMask();
9478+
if (IsMaskRequired) {
94889479
// Mask reversal is only needed for non-all-one (null) masks, as reverse of
94899480
// a null all-one mask is a null mask.
94909481
for (unsigned Part = 0; Part < State.UF; ++Part) {
@@ -9495,56 +9486,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
94959486
}
94969487
}
94979488

9498-
// Handle Stores:
9499-
if (SI) {
9500-
State.setDebugLocFrom(getDebugLoc());
9501-
9502-
for (unsigned Part = 0; Part < State.UF; ++Part) {
9503-
Instruction *NewSI = nullptr;
9504-
Value *StoredVal = State.get(StoredValue, Part);
9505-
// TODO: split this into several classes for better design.
9506-
if (State.EVL) {
9507-
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
9508-
"explicit vector length.");
9509-
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
9510-
VPInstruction::ExplicitVectorLength &&
9511-
"EVL must be VPInstruction::ExplicitVectorLength.");
9512-
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
9513-
// If EVL is not nullptr, then EVL must be a valid value set during plan
9514-
// creation, possibly default value = whole vector register length. EVL
9515-
// is created only if TTI prefers predicated vectorization, thus if EVL
9516-
// is not nullptr it also implies preference for predicated
9517-
// vectorization.
9518-
// FIXME: Support reverse store after vp_reverse is added.
9519-
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
9520-
NewSI = lowerStoreUsingVectorIntrinsics(
9521-
Builder, State.get(getAddr(), Part, !CreateGatherScatter),
9522-
StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
9523-
} else if (CreateGatherScatter) {
9524-
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
9525-
Value *VectorGep = State.get(getAddr(), Part);
9526-
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
9527-
MaskPart);
9528-
} else {
9529-
if (isReverse()) {
9530-
// If we store to reverse consecutive memory locations, then we need
9531-
// to reverse the order of elements in the stored value.
9532-
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
9533-
// We don't want to update the value in the map as it might be used in
9534-
// another expression. So don't call resetVectorValue(StoredVal).
9535-
}
9536-
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
9537-
if (isMaskRequired)
9538-
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
9539-
BlockInMaskParts[Part]);
9540-
else
9541-
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
9542-
}
9543-
State.addMetadata(NewSI, SI);
9544-
}
9545-
return;
9546-
}
9547-
95489489
// Handle loads.
95499490
assert(LI && "Must have a load instruction");
95509491
State.setDebugLocFrom(getDebugLoc());
@@ -9564,19 +9505,19 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
95649505
// is not nullptr it also implies preference for predicated
95659506
// vectorization.
95669507
// FIXME: Support reverse loading after vp_reverse is added.
9567-
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
9508+
Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr;
95689509
NewLI = lowerLoadUsingVectorIntrinsics(
9569-
Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
9570-
CreateGatherScatter, MaskPart, EVL, Alignment);
9571-
} else if (CreateGatherScatter) {
9572-
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
9510+
Builder, DataTy, State.get(getAddr(), Part, !CreateGather),
9511+
CreateGather, MaskPart, EVL, Alignment);
9512+
} else if (CreateGather) {
9513+
Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr;
95739514
Value *VectorGep = State.get(getAddr(), Part);
95749515
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
95759516
nullptr, "wide.masked.gather");
95769517
State.addMetadata(NewLI, LI);
95779518
} else {
95789519
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
9579-
if (isMaskRequired)
9520+
if (IsMaskRequired)
95809521
NewLI = Builder.CreateMaskedLoad(
95819522
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
95829523
PoisonValue::get(DataTy), "wide.masked.load");
@@ -9590,7 +9531,75 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
95909531
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
95919532
}
95929533

9593-
State.set(getVPSingleValue(), NewLI, Part);
9534+
State.set(this, NewLI, Part);
9535+
}
9536+
}
9537+
9538+
void VPWidenStoreRecipe::execute(VPTransformState &State) {
9539+
auto *SI = cast<StoreInst>(&Ingredient);
9540+
9541+
VPValue *StoredValue = getStoredValue();
9542+
bool CreateScatter = !isConsecutive();
9543+
const Align Alignment = getLoadStoreAlignment(&Ingredient);
9544+
9545+
auto &Builder = State.Builder;
9546+
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
9547+
bool IsMaskRequired = getMask();
9548+
if (IsMaskRequired) {
9549+
// Mask reversal is only needed for non-all-one (null) masks, as reverse of
9550+
// a null all-one mask is a null mask.
9551+
for (unsigned Part = 0; Part < State.UF; ++Part) {
9552+
Value *Mask = State.get(getMask(), Part);
9553+
if (isReverse())
9554+
Mask = Builder.CreateVectorReverse(Mask, "reverse");
9555+
BlockInMaskParts[Part] = Mask;
9556+
}
9557+
}
9558+
9559+
State.setDebugLocFrom(getDebugLoc());
9560+
9561+
for (unsigned Part = 0; Part < State.UF; ++Part) {
9562+
Instruction *NewSI = nullptr;
9563+
Value *StoredVal = State.get(StoredValue, Part);
9564+
// TODO: split this into several classes for better design.
9565+
if (State.EVL) {
9566+
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
9567+
"explicit vector length.");
9568+
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
9569+
VPInstruction::ExplicitVectorLength &&
9570+
"EVL must be VPInstruction::ExplicitVectorLength.");
9571+
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
9572+
// If EVL is not nullptr, then EVL must be a valid value set during plan
9573+
// creation, possibly default value = whole vector register length. EVL
9574+
// is created only if TTI prefers predicated vectorization, thus if EVL
9575+
// is not nullptr it also implies preference for predicated
9576+
// vectorization.
9577+
// FIXME: Support reverse store after vp_reverse is added.
9578+
Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr;
9579+
NewSI = lowerStoreUsingVectorIntrinsics(
9580+
Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal,
9581+
CreateScatter, MaskPart, EVL, Alignment);
9582+
} else if (CreateScatter) {
9583+
Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr;
9584+
Value *VectorGep = State.get(getAddr(), Part);
9585+
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
9586+
MaskPart);
9587+
} else {
9588+
if (isReverse()) {
9589+
// If we store to reverse consecutive memory locations, then we need
9590+
// to reverse the order of elements in the stored value.
9591+
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
9592+
// We don't want to update the value in the map as it might be used in
9593+
// another expression. So don't call resetVectorValue(StoredVal).
9594+
}
9595+
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
9596+
if (IsMaskRequired)
9597+
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
9598+
BlockInMaskParts[Part]);
9599+
else
9600+
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
9601+
}
9602+
State.addMetadata(NewSI, SI);
95949603
}
95959604
}
95969605

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ class VPRecipeBuilder {
6969
/// Check if the load or store instruction \p I should widened for \p
7070
/// Range.Start and potentially masked. Such instructions are handled by a
7171
/// recipe that takes an additional VPInstruction for the mask.
72-
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
73-
ArrayRef<VPValue *> Operands,
74-
VFRange &Range);
72+
VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
73+
ArrayRef<VPValue *> Operands,
74+
VFRange &Range);
7575

7676
/// Check if an induction recipe should be constructed for \p Phi. If so build
7777
/// and return it. If not, return null.

0 commit comments

Comments
 (0)