Skip to content

Commit 2fd7844

Browse files
committed
[VPlan] Manage noalias/alias_scope metadata in VPlan.
Use VPIRMetadata added in llvm#135272 to also manage no-alias metadata added by versioning. Note that this means we have to build the no-alias metadata up-front once. If it is not used, it will be discarded automatically.
1 parent 38bf1af commit 2fd7844

File tree

9 files changed

+69
-96
lines changed

9 files changed

+69
-96
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class LoopVectorizationLegality;
3636
class LoopVectorizationCostModel;
3737
class PredicatedScalarEvolution;
3838
class LoopVectorizeHints;
39+
class LoopVersioning;
3940
class OptimizationRemarkEmitter;
4041
class TargetTransformInfo;
4142
class TargetLibraryInfo;
@@ -515,7 +516,7 @@ class LoopVectorizationPlanner {
515516
/// returned VPlan is valid for. If no VPlan can be built for the input range,
516517
/// set the largest included VF to the maximum VF for which no plan could be
517518
/// built.
518-
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
519+
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer);
519520

520521
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
521522
/// according to the information gathered by Legal when it checked if it is

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2371,7 +2371,7 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23712371
InputLane = VPLane::getFirstLane();
23722372
Cloned->setOperand(I.index(), State.get(Operand, InputLane));
23732373
}
2374-
State.addNewMetadata(Cloned, Instr);
2374+
RepRecipe->applyMetadata(Cloned);
23752375

23762376
// Place the cloned scalar in the new loop.
23772377
State.Builder.Insert(Cloned);
@@ -7989,24 +7989,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
79897989
if (VectorizingEpilogue)
79907990
VPlanTransforms::removeDeadRecipes(BestVPlan);
79917991

7992-
// Only use noalias metadata when using memory checks guaranteeing no overlap
7993-
// across all iterations.
7994-
const LoopAccessInfo *LAI = ILV.Legal->getLAI();
7995-
std::unique_ptr<LoopVersioning> LVer = nullptr;
7996-
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
7997-
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
7998-
7999-
// We currently don't use LoopVersioning for the actual loop cloning but we
8000-
// still use it to add the noalias metadata.
8001-
// TODO: Find a better way to re-use LoopVersioning functionality to add
8002-
// metadata.
8003-
LVer = std::make_unique<LoopVersioning>(
8004-
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
8005-
PSE.getSE());
8006-
State.LVer = &*LVer;
8007-
State.LVer->prepareNoAliasMetadata();
8008-
}
8009-
80107992
ILV.printDebugTracesAtStart();
80117993

80127994
//===------------------------------------------------===//
@@ -8597,15 +8579,14 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
85978579
Builder.insert(VectorPtr);
85988580
Ptr = VectorPtr;
85998581
}
8582+
auto Metadata = getMetadataToPropagate(I);
86008583
if (LoadInst *Load = dyn_cast<LoadInst>(I))
86018584
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8602-
getMetadataToPropagate(Load),
8603-
I->getDebugLoc());
8585+
Metadata, I->getDebugLoc());
86048586

86058587
StoreInst *Store = cast<StoreInst>(I);
86068588
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8607-
Reverse, getMetadataToPropagate(Store),
8608-
I->getDebugLoc());
8589+
Reverse, Metadata, I->getDebugLoc());
86098590
}
86108591

86118592
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8985,8 +8966,9 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
89858966
assert((Range.Start.isScalar() || !IsUniform || !IsPredicated ||
89868967
(Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
89878968
"Should not predicate a uniform recipe");
8988-
auto *Recipe = new VPReplicateRecipe(
8989-
I, make_range(Operands.begin(), Operands.end()), IsUniform, BlockInMask);
8969+
auto *Recipe =
8970+
new VPReplicateRecipe(I, make_range(Operands.begin(), Operands.end()),
8971+
IsUniform, BlockInMask, getMetadataToPropagate(I));
89908972
return Recipe;
89918973
}
89928974

@@ -9104,9 +9086,16 @@ bool VPRecipeBuilder::getScaledReductions(
91049086
}
91059087

91069088
SmallVector<std::pair<unsigned, MDNode *>>
9107-
VPRecipeBuilder::getMetadataToPropagate(Instruction *I) {
9089+
VPRecipeBuilder::getMetadataToPropagate(Instruction *I) const {
91089090
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
91099091
::getMetadataToPropagate(I, Metadata);
9092+
if (LVer && isa<LoadInst, StoreInst>(I)) {
9093+
const auto &[AliasScopeMD, NoAliasMD] = LVer->getNoAliasMetadataFor(I);
9094+
if (AliasScopeMD)
9095+
Metadata.emplace_back(LLVMContext::MD_alias_scope, AliasScopeMD);
9096+
if (NoAliasMD)
9097+
Metadata.emplace_back(LLVMContext::MD_noalias, NoAliasMD);
9098+
}
91109099
return Metadata;
91119100
}
91129101

@@ -9239,10 +9228,22 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
92399228
ElementCount MaxVF) {
92409229
assert(OrigLoop->isInnermost() && "Inner loop expected.");
92419230

9231+
// Only use noalias metadata when using memory checks guaranteeing no overlap
9232+
// across all iterations.
9233+
const LoopAccessInfo *LAI = Legal->getLAI();
9234+
std::unique_ptr<LoopVersioning> LVer = nullptr;
9235+
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
9236+
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
9237+
LVer = std::make_unique<LoopVersioning>(
9238+
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
9239+
PSE.getSE());
9240+
LVer->prepareNoAliasMetadata();
9241+
}
9242+
92429243
auto MaxVFTimes2 = MaxVF * 2;
92439244
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
92449245
VFRange SubRange = {VF, MaxVFTimes2};
9245-
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) {
9246+
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, LVer.get())) {
92469247
bool HasScalarVF = Plan->hasScalarVFOnly();
92479248
// Now optimize the initial VPlan.
92489249
if (!HasScalarVF)
@@ -9550,7 +9551,8 @@ static void addExitUsersForFirstOrderRecurrences(
95509551
}
95519552

95529553
VPlanPtr
9553-
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9554+
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
9555+
LoopVersioning *LVer) {
95549556

95559557
using namespace llvm::VPlanPatternMatch;
95569558
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -9596,7 +9598,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95969598
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
95979599

95989600
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9599-
Builder);
9601+
Builder, LVer);
96009602

96019603
// ---------------------------------------------------------------------------
96029604
// Pre-construction: record ingredients whose recipes we'll need to further
@@ -9710,8 +9712,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
97109712
Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
97119713
// Only create recipe for the final invariant store of the reduction.
97129714
if (Legal->isInvariantStoreOfReduction(SI)) {
9713-
auto *Recipe =
9714-
new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */);
9715+
auto *Recipe = new VPReplicateRecipe(
9716+
SI, R.operands(), true /* IsUniform */, /*Mask*/ nullptr,
9717+
RecipeBuilder.getMetadataToPropagate(SI));
97159718
Recipe->insertBefore(*MiddleVPBB, MBIP);
97169719
}
97179720
R.eraseFromParent();
@@ -9897,7 +9900,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
98979900
// Collect mapping of IR header phis to header phi recipes, to be used in
98989901
// addScalarResumePhis.
98999902
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9900-
Builder);
9903+
Builder, nullptr);
99019904
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
99029905
if (isa<VPCanonicalIVPHIRecipe>(&R))
99039906
continue;

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ class VPRecipeBuilder {
9090
/// A mapping of partial reduction exit instructions to their scaling factor.
9191
DenseMap<const Instruction *, unsigned> ScaledReductionMap;
9292

93+
/// Loop versioning instance for getting noalias metadata guaranteed by
94+
/// runtime checks.
95+
LoopVersioning *LVer;
96+
9397
/// Check if \p I can be widened at the start of \p Range and possibly
9498
/// decrease the range such that the returned value holds for the entire \p
9599
/// Range. The function should not be called for memory instructions or calls.
@@ -155,9 +159,10 @@ class VPRecipeBuilder {
155159
const TargetTransformInfo *TTI,
156160
LoopVectorizationLegality *Legal,
157161
LoopVectorizationCostModel &CM,
158-
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
162+
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
163+
LoopVersioning *LVer)
159164
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
160-
CM(CM), PSE(PSE), Builder(Builder) {}
165+
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
161166

162167
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
163168
auto It = ScaledReductionMap.find(ExitInst);
@@ -236,8 +241,8 @@ class VPRecipeBuilder {
236241

237242
/// Returns the metatadata that can be preserved from the original instruction
238243
/// \p I, including noalias metadata guaranteed by runtime checks.
239-
static SmallVector<std::pair<unsigned, MDNode *>>
240-
getMetadataToPropagate(Instruction *I);
244+
SmallVector<std::pair<unsigned, MDNode *>>
245+
getMetadataToPropagate(Instruction *I) const;
241246
};
242247
} // end namespace llvm
243248

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
220220
InnerLoopVectorizer *ILV, VPlan *Plan,
221221
Loop *CurrentParentLoop, Type *CanonicalIVTy)
222222
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
223-
CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
224-
TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
223+
CurrentParentLoop(CurrentParentLoop), TypeAnalysis(CanonicalIVTy),
224+
VPDT(*Plan) {}
225225

226226
Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
227227
if (Def->isLiveIn())
@@ -355,15 +355,6 @@ BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
355355
return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
356356
}
357357

358-
void VPTransformState::addNewMetadata(Value *To, const Instruction *Orig) {
359-
360-
// If the loop was versioned with memchecks, add the corresponding no-alias
361-
// metadata.
362-
Instruction *ToI = dyn_cast<Instruction>(To);
363-
if (ToI && LVer && isa<LoadInst, StoreInst>(Orig))
364-
LVer->annotateInstWithNoAlias(ToI, Orig);
365-
}
366-
367358
void VPTransformState::setDebugLocFrom(DebugLoc DL) {
368359
const DILocation *DIL = DL;
369360
// When a FSDiscriminator is enabled, we don't need to add the multiply

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2469,7 +2469,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
24692469
/// copies of the original scalar type, one per lane, instead of producing a
24702470
/// single copy of widened type for all lanes. If the instruction is known to be
24712471
/// uniform only one copy, per lane zero, will be generated.
2472-
class VPReplicateRecipe : public VPRecipeWithIRFlags {
2472+
class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
24732473
/// Indicator if only a single replica per lane is needed.
24742474
bool IsUniform;
24752475

@@ -2479,19 +2479,20 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
24792479
public:
24802480
template <typename IterT>
24812481
VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
2482-
bool IsUniform, VPValue *Mask = nullptr)
2482+
bool IsUniform, VPValue *Mask = nullptr,
2483+
ArrayRef<std::pair<unsigned, MDNode *>> Metadata = {})
24832484
: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2484-
IsUniform(IsUniform), IsPredicated(Mask) {
2485+
VPIRMetadata(Metadata), IsUniform(IsUniform), IsPredicated(Mask) {
24852486
if (Mask)
24862487
addOperand(Mask);
24872488
}
24882489

24892490
~VPReplicateRecipe() override = default;
24902491

24912492
VPReplicateRecipe *clone() override {
2492-
auto *Copy =
2493-
new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2494-
isPredicated() ? getMask() : nullptr);
2493+
auto *Copy = new VPReplicateRecipe(
2494+
getUnderlyingInstr(), operands(), IsUniform,
2495+
isPredicated() ? getMask() : nullptr, getMetadata());
24952496
Copy->transferFlags(*this);
24962497
return Copy;
24972498
}

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ class VPBasicBlock;
3838
class VPRegionBlock;
3939
class VPlan;
4040
class Value;
41-
class LoopVersioning;
4241

4342
/// Returns a calculation for the total number of elements for a given \p VF.
4443
/// For fixed width vectors this value is a constant, whereas for scalable
@@ -283,13 +282,6 @@ struct VPTransformState {
283282
Iter->second[CacheIdx] = V;
284283
}
285284

286-
/// Add additional metadata to \p To that was not present on \p Orig.
287-
///
288-
/// Currently this is used to add the noalias annotations based on the
289-
/// inserted memchecks. Use this for instructions that are *cloned* into the
290-
/// vector loop.
291-
void addNewMetadata(Value *To, const Instruction *Orig);
292-
293285
/// Set the debug location in the builder using the debug location \p DL.
294286
void setDebugLocFrom(DebugLoc DL);
295287

@@ -341,13 +333,6 @@ struct VPTransformState {
341333
/// The parent loop object for the current scope, or nullptr.
342334
Loop *CurrentParentLoop = nullptr;
343335

344-
/// LoopVersioning. It's only set up (non-null) if memchecks were
345-
/// used.
346-
///
347-
/// This is currently only used to add no-alias metadata based on the
348-
/// memchecks. The actually versioning is performed manually.
349-
LoopVersioning *LVer = nullptr;
350-
351336
/// VPlan-based type analysis.
352337
VPTypeAnalysis TypeAnalysis;
353338

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2729,8 +2729,6 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
27292729
}
27302730

27312731
void VPWidenLoadRecipe::execute(VPTransformState &State) {
2732-
auto *LI = cast<LoadInst>(&Ingredient);
2733-
27342732
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
27352733
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
27362734
const Align Alignment = getLoadStoreAlignment(&Ingredient);
@@ -2759,7 +2757,6 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
27592757
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
27602758
}
27612759
// Add metadata to the load, but setVectorValue to the reverse shuffle.
2762-
State.addNewMetadata(NewLI, LI);
27632760
applyMetadata(NewLI);
27642761
if (Reverse)
27652762
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
@@ -2788,8 +2785,6 @@ static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
27882785
}
27892786

27902787
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
2791-
auto *LI = cast<LoadInst>(&Ingredient);
2792-
27932788
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
27942789
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
27952790
const Align Alignment = getLoadStoreAlignment(&Ingredient);
@@ -2820,7 +2815,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
28202815
}
28212816
NewLI->addParamAttr(
28222817
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
2823-
State.addNewMetadata(NewLI, LI);
28242818
applyMetadata(NewLI);
28252819
Instruction *Res = NewLI;
28262820
if (isReverse())
@@ -2864,8 +2858,6 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
28642858
#endif
28652859

28662860
void VPWidenStoreRecipe::execute(VPTransformState &State) {
2867-
auto *SI = cast<StoreInst>(&Ingredient);
2868-
28692861
VPValue *StoredVPValue = getStoredValue();
28702862
bool CreateScatter = !isConsecutive();
28712863
const Align Alignment = getLoadStoreAlignment(&Ingredient);
@@ -2897,7 +2889,6 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
28972889
NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
28982890
else
28992891
NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
2900-
State.addNewMetadata(NewSI, SI);
29012892
applyMetadata(NewSI);
29022893
}
29032894

@@ -2910,8 +2901,6 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
29102901
#endif
29112902

29122903
void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
2913-
auto *SI = cast<StoreInst>(&Ingredient);
2914-
29152904
VPValue *StoredValue = getStoredValue();
29162905
bool CreateScatter = !isConsecutive();
29172906
const Align Alignment = getLoadStoreAlignment(&Ingredient);
@@ -2945,7 +2934,6 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
29452934
}
29462935
NewSI->addParamAttr(
29472936
1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));
2948-
State.addNewMetadata(NewSI, SI);
29492937
applyMetadata(NewSI);
29502938
}
29512939

0 commit comments

Comments
 (0)