Skip to content

Commit a9bafe9

Browse files
authored
[VPlan] Split VPWidenMemoryInstructionRecipe (NFCI). (#87411)
This patch introduces a new VPWidenMemoryRecipe base class and distinct sub-classes to model loads and stores. This is a first step in an effort to simplify and modularize code generation for widened loads and stores and enable adding further more specialized memory recipes. PR: #87411
1 parent cbe148b commit a9bafe9

File tree

13 files changed

+249
-223
lines changed

13 files changed

+249
-223
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 90 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -545,11 +545,6 @@ class InnerLoopVectorizer {
545545
// Return true if any runtime check is added.
546546
bool areSafetyChecksAdded() { return AddedSafetyChecks; }
547547

548-
/// A type for vectorized values in the new loop. Each value from the
549-
/// original loop, when vectorized, is represented by UF vector values in the
550-
/// new unrolled loop, where UF is the unroll factor.
551-
using VectorParts = SmallVector<Value *, 2>;
552-
553548
/// A helper function to scalarize a single Instruction in the innermost loop.
554549
/// Generates a sequence of scalar instances for each lane between \p MinLane
555550
/// and \p MaxLane, times each part between \p MinPart and \p MaxPart,
@@ -8086,7 +8081,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
80868081
BlockMaskCache[BB] = BlockMask;
80878082
}
80888083

8089-
VPWidenMemoryInstructionRecipe *
8084+
VPWidenMemoryRecipe *
80908085
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
80918086
VFRange &Range) {
80928087
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@@ -8131,12 +8126,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
81318126
Ptr = VectorPtr;
81328127
}
81338128
if (LoadInst *Load = dyn_cast<LoadInst>(I))
8134-
return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
8135-
Reverse, I->getDebugLoc());
8129+
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8130+
I->getDebugLoc());
81368131

81378132
StoreInst *Store = cast<StoreInst>(I);
8138-
return new VPWidenMemoryInstructionRecipe(
8139-
*Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc());
8133+
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8134+
Reverse, I->getDebugLoc());
81408135
}
81418136

81428137
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8775,13 +8770,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
87758770
// for this VPlan, replace the Recipes widening its memory instructions with a
87768771
// single VPInterleaveRecipe at its insertion point.
87778772
for (const auto *IG : InterleaveGroups) {
8778-
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
8779-
RecipeBuilder.getRecipe(IG->getInsertPos()));
8773+
auto *Recipe =
8774+
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
87808775
SmallVector<VPValue *, 4> StoredValues;
87818776
for (unsigned i = 0; i < IG->getFactor(); ++i)
87828777
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
8783-
auto *StoreR =
8784-
cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
8778+
auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
87858779
StoredValues.push_back(StoreR->getStoredValue());
87868780
}
87878781

@@ -9368,92 +9362,27 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
93689362
return Call;
93699363
}
93709364

9371-
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
9372-
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
9373-
9374-
// Attempt to issue a wide load.
9375-
LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
9376-
StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
9377-
9378-
assert((LI || SI) && "Invalid Load/Store instruction");
9379-
assert((!SI || StoredValue) && "No stored value provided for widened store");
9380-
assert((!LI || !StoredValue) && "Stored value provided for widened load");
9365+
void VPWidenLoadRecipe::execute(VPTransformState &State) {
9366+
auto *LI = cast<LoadInst>(&Ingredient);
93819367

93829368
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
9383-
93849369
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
93859370
const Align Alignment = getLoadStoreAlignment(&Ingredient);
9386-
bool CreateGatherScatter = !isConsecutive();
9371+
bool CreateGather = !isConsecutive();
93879372

93889373
auto &Builder = State.Builder;
9389-
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
9390-
bool isMaskRequired = getMask();
9391-
if (isMaskRequired) {
9392-
// Mask reversal is only needed for non-all-one (null) masks, as reverse of
9393-
// a null all-one mask is a null mask.
9394-
for (unsigned Part = 0; Part < State.UF; ++Part) {
9395-
Value *Mask = State.get(getMask(), Part);
9374+
State.setDebugLocFrom(getDebugLoc());
9375+
for (unsigned Part = 0; Part < State.UF; ++Part) {
9376+
Value *NewLI;
9377+
Value *Mask = nullptr;
9378+
if (auto *VPMask = getMask()) {
9379+
// Mask reversal is only needed for non-all-one (null) masks, as reverse
9380+
// of a null all-one mask is a null mask.
9381+
Mask = State.get(VPMask, Part);
93969382
if (isReverse())
93979383
Mask = Builder.CreateVectorReverse(Mask, "reverse");
9398-
BlockInMaskParts[Part] = Mask;
9399-
}
9400-
}
9401-
9402-
// Handle Stores:
9403-
if (SI) {
9404-
State.setDebugLocFrom(getDebugLoc());
9405-
9406-
for (unsigned Part = 0; Part < State.UF; ++Part) {
9407-
Instruction *NewSI = nullptr;
9408-
Value *StoredVal = State.get(StoredValue, Part);
9409-
// TODO: split this into several classes for better design.
9410-
if (State.EVL) {
9411-
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
9412-
"explicit vector length.");
9413-
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
9414-
VPInstruction::ExplicitVectorLength &&
9415-
"EVL must be VPInstruction::ExplicitVectorLength.");
9416-
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
9417-
// If EVL is not nullptr, then EVL must be a valid value set during plan
9418-
// creation, possibly default value = whole vector register length. EVL
9419-
// is created only if TTI prefers predicated vectorization, thus if EVL
9420-
// is not nullptr it also implies preference for predicated
9421-
// vectorization.
9422-
// FIXME: Support reverse store after vp_reverse is added.
9423-
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
9424-
NewSI = lowerStoreUsingVectorIntrinsics(
9425-
Builder, State.get(getAddr(), Part, !CreateGatherScatter),
9426-
StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
9427-
} else if (CreateGatherScatter) {
9428-
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
9429-
Value *VectorGep = State.get(getAddr(), Part);
9430-
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
9431-
MaskPart);
9432-
} else {
9433-
if (isReverse()) {
9434-
// If we store to reverse consecutive memory locations, then we need
9435-
// to reverse the order of elements in the stored value.
9436-
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
9437-
// We don't want to update the value in the map as it might be used in
9438-
// another expression. So don't call resetVectorValue(StoredVal).
9439-
}
9440-
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
9441-
if (isMaskRequired)
9442-
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
9443-
BlockInMaskParts[Part]);
9444-
else
9445-
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
9446-
}
9447-
State.addMetadata(NewSI, SI);
94489384
}
9449-
return;
9450-
}
94519385

9452-
// Handle loads.
9453-
assert(LI && "Must have a load instruction");
9454-
State.setDebugLocFrom(getDebugLoc());
9455-
for (unsigned Part = 0; Part < State.UF; ++Part) {
9456-
Value *NewLI;
94579386
// TODO: split this into several classes for better design.
94589387
if (State.EVL) {
94599388
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
@@ -9468,22 +9397,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
94689397
// is not nullptr it also implies preference for predicated
94699398
// vectorization.
94709399
// FIXME: Support reverse loading after vp_reverse is added.
9471-
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
94729400
NewLI = lowerLoadUsingVectorIntrinsics(
9473-
Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
9474-
CreateGatherScatter, MaskPart, EVL, Alignment);
9475-
} else if (CreateGatherScatter) {
9476-
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
9401+
Builder, DataTy, State.get(getAddr(), Part, !CreateGather),
9402+
CreateGather, Mask, EVL, Alignment);
9403+
} else if (CreateGather) {
94779404
Value *VectorGep = State.get(getAddr(), Part);
9478-
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
9405+
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, Mask,
94799406
nullptr, "wide.masked.gather");
94809407
State.addMetadata(NewLI, LI);
94819408
} else {
94829409
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
9483-
if (isMaskRequired)
9484-
NewLI = Builder.CreateMaskedLoad(
9485-
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
9486-
PoisonValue::get(DataTy), "wide.masked.load");
9410+
if (Mask)
9411+
NewLI = Builder.CreateMaskedLoad(DataTy, VecPtr, Alignment, Mask,
9412+
PoisonValue::get(DataTy),
9413+
"wide.masked.load");
94879414
else
94889415
NewLI =
94899416
Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
@@ -9494,7 +9421,69 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
94949421
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
94959422
}
94969423

9497-
State.set(getVPSingleValue(), NewLI, Part);
9424+
State.set(this, NewLI, Part);
9425+
}
9426+
}
9427+
9428+
void VPWidenStoreRecipe::execute(VPTransformState &State) {
9429+
auto *SI = cast<StoreInst>(&Ingredient);
9430+
9431+
VPValue *StoredVPValue = getStoredValue();
9432+
bool CreateScatter = !isConsecutive();
9433+
const Align Alignment = getLoadStoreAlignment(&Ingredient);
9434+
9435+
auto &Builder = State.Builder;
9436+
State.setDebugLocFrom(getDebugLoc());
9437+
9438+
for (unsigned Part = 0; Part < State.UF; ++Part) {
9439+
Instruction *NewSI = nullptr;
9440+
Value *Mask = nullptr;
9441+
if (auto *VPMask = getMask()) {
9442+
// Mask reversal is only needed for non-all-one (null) masks, as reverse
9443+
// of a null all-one mask is a null mask.
9444+
Mask = State.get(VPMask, Part);
9445+
if (isReverse())
9446+
Mask = Builder.CreateVectorReverse(Mask, "reverse");
9447+
}
9448+
9449+
Value *StoredVal = State.get(StoredVPValue, Part);
9450+
if (isReverse()) {
9451+
assert(!State.EVL && "reversing not yet implemented with EVL");
9452+
// If we store to reverse consecutive memory locations, then we need
9453+
// to reverse the order of elements in the stored value.
9454+
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
9455+
// We don't want to update the value in the map as it might be used in
9456+
// another expression. So don't call resetVectorValue(StoredVal).
9457+
}
9458+
// TODO: split this into several classes for better design.
9459+
if (State.EVL) {
9460+
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
9461+
"explicit vector length.");
9462+
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
9463+
VPInstruction::ExplicitVectorLength &&
9464+
"EVL must be VPInstruction::ExplicitVectorLength.");
9465+
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
9466+
// If EVL is not nullptr, then EVL must be a valid value set during plan
9467+
// creation, possibly default value = whole vector register length. EVL
9468+
// is created only if TTI prefers predicated vectorization, thus if EVL
9469+
// is not nullptr it also implies preference for predicated
9470+
// vectorization.
9471+
// FIXME: Support reverse store after vp_reverse is added.
9472+
NewSI = lowerStoreUsingVectorIntrinsics(
9473+
Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal,
9474+
CreateScatter, Mask, EVL, Alignment);
9475+
} else if (CreateScatter) {
9476+
Value *VectorGep = State.get(getAddr(), Part);
9477+
NewSI =
9478+
Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, Mask);
9479+
} else {
9480+
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
9481+
if (Mask)
9482+
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, Mask);
9483+
else
9484+
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
9485+
}
9486+
State.addMetadata(NewSI, SI);
94989487
}
94999488
}
95009489

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ class VPRecipeBuilder {
6969
/// Check if the load or store instruction \p I should widened for \p
7070
/// Range.Start and potentially masked. Such instructions are handled by a
7171
/// recipe that takes an additional VPInstruction for the mask.
72-
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
73-
ArrayRef<VPValue *> Operands,
74-
VFRange &Range);
72+
VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
73+
ArrayRef<VPValue *> Operands,
74+
VFRange &Range);
7575

7676
/// Check if an induction recipe should be constructed for \p Phi. If so build
7777
/// and return it. If not, return null.

0 commit comments

Comments
 (0)