Skip to content

Commit 5b064b1

Browse files
committed
[VPlan] Manage noalias/alias_scope metadata in VPlan.
Use VPIRMetadata added in llvm#135272 to also manage no-alias metadata added by versioning. Note that this means we have to build the no-alias metadata up-front once. If it is not used, it will be discarded automatically. !fixup move applyMetadata; !fixup address comments, thanks !fixup address remaining comments, thanks !fixup address latest comments, thanks
1 parent 92d2e13 commit 5b064b1

File tree

10 files changed

+100
-100
lines changed

10 files changed

+100
-100
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class LoopVectorizationLegality;
3636
class LoopVectorizationCostModel;
3737
class PredicatedScalarEvolution;
3838
class LoopVectorizeHints;
39+
class LoopVersioning;
3940
class OptimizationRemarkEmitter;
4041
class TargetTransformInfo;
4142
class TargetLibraryInfo;
@@ -524,7 +525,7 @@ class LoopVectorizationPlanner {
524525
/// returned VPlan is valid for. If no VPlan can be built for the input range,
525526
/// set the largest included VF to the maximum VF for which no plan could be
526527
/// built.
527-
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
528+
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer);
528529

529530
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
530531
/// according to the information gathered by Legal when it checked if it is

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7838,24 +7838,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78387838
if (VectorizingEpilogue)
78397839
VPlanTransforms::removeDeadRecipes(BestVPlan);
78407840

7841-
// Only use noalias metadata when using memory checks guaranteeing no overlap
7842-
// across all iterations.
7843-
const LoopAccessInfo *LAI = Legal->getLAI();
7844-
std::unique_ptr<LoopVersioning> LVer = nullptr;
7845-
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
7846-
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
7847-
7848-
// We currently don't use LoopVersioning for the actual loop cloning but we
7849-
// still use it to add the noalias metadata.
7850-
// TODO: Find a better way to re-use LoopVersioning functionality to add
7851-
// metadata.
7852-
LVer = std::make_unique<LoopVersioning>(
7853-
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
7854-
PSE.getSE());
7855-
State.LVer = &*LVer;
7856-
State.LVer->prepareNoAliasMetadata();
7857-
}
7858-
78597841
ILV.printDebugTracesAtStart();
78607842

78617843
//===------------------------------------------------===//
@@ -8466,13 +8448,14 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
84668448
Builder.insert(VectorPtr);
84678449
Ptr = VectorPtr;
84688450
}
8451+
auto Metadata = getRecipeMetadata(I);
84698452
if (LoadInst *Load = dyn_cast<LoadInst>(I))
84708453
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8471-
I->getDebugLoc());
8454+
Metadata, I->getDebugLoc());
84728455

84738456
StoreInst *Store = cast<StoreInst>(I);
84748457
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8475-
Reverse, I->getDebugLoc());
8458+
Reverse, Metadata, I->getDebugLoc());
84768459
}
84778460

84788461
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8845,7 +8828,8 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
88458828
assert((Range.Start.isScalar() || !IsUniform || !IsPredicated ||
88468829
(Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
88478830
"Should not predicate a uniform recipe");
8848-
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask);
8831+
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask,
8832+
getRecipeMetadata(I));
88498833
return Recipe;
88508834
}
88518835

@@ -8966,6 +8950,20 @@ bool VPRecipeBuilder::getScaledReductions(
89668950
return false;
89678951
}
89688952

8953+
VPIRMetadata VPRecipeBuilder::getRecipeMetadata(Instruction *I) const {
8954+
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
8955+
::getMetadataToPropagate(I, Metadata);
8956+
if (!LVer || !isa<LoadInst, StoreInst>(I))
8957+
return {};
8958+
8959+
const auto &[AliasScopeMD, NoAliasMD] = LVer->getNoAliasMetadataFor(I);
8960+
if (AliasScopeMD)
8961+
Metadata.emplace_back(LLVMContext::MD_alias_scope, AliasScopeMD);
8962+
if (NoAliasMD)
8963+
Metadata.emplace_back(LLVMContext::MD_noalias, NoAliasMD);
8964+
return {Metadata};
8965+
}
8966+
89698967
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
89708968
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range) {
89718969
// First, check for specific widening recipes that deal with inductions, Phi
@@ -9092,10 +9090,20 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
90929090
ElementCount MaxVF) {
90939091
assert(OrigLoop->isInnermost() && "Inner loop expected.");
90949092

9093+
const LoopAccessInfo *LAI = Legal->getLAI();
9094+
LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
9095+
OrigLoop, LI, DT, PSE.getSE());
9096+
if (!LAI->getRuntimePointerChecking()->getChecks().empty() &&
9097+
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
9098+
// Only use noalias metadata when using memory checks guaranteeing no
9099+
// overlap across all iterations.
9100+
LVer.prepareNoAliasMetadata();
9101+
}
9102+
90959103
auto MaxVFTimes2 = MaxVF * 2;
90969104
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
90979105
VFRange SubRange = {VF, MaxVFTimes2};
9098-
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) {
9106+
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, &LVer)) {
90999107
bool HasScalarVF = Plan->hasScalarVFOnly();
91009108
// Now optimize the initial VPlan.
91019109
if (!HasScalarVF)
@@ -9357,7 +9365,8 @@ static void addExitUsersForFirstOrderRecurrences(
93579365
}
93589366

93599367
VPlanPtr
9360-
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9368+
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
9369+
LoopVersioning *LVer) {
93619370

93629371
using namespace llvm::VPlanPatternMatch;
93639372
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -9413,7 +9422,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94139422
}
94149423

94159424
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9416-
Builder);
9425+
Builder, LVer);
94179426

94189427
// ---------------------------------------------------------------------------
94199428
// Pre-construction: record ingredients whose recipes we'll need to further
@@ -9519,8 +9528,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95199528
Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
95209529
// Only create recipe for the final invariant store of the reduction.
95219530
if (Legal->isInvariantStoreOfReduction(SI)) {
9522-
auto *Recipe =
9523-
new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */);
9531+
auto *Recipe = new VPReplicateRecipe(
9532+
SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/,
9533+
RecipeBuilder.getRecipeMetadata(SI));
95249534
Recipe->insertBefore(*MiddleVPBB, MBIP);
95259535
}
95269536
R.eraseFromParent();

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ class VPRecipeBuilder {
9090
/// A mapping of partial reduction exit instructions to their scaling factor.
9191
DenseMap<const Instruction *, unsigned> ScaledReductionMap;
9292

93+
/// Loop versioning instance for getting noalias metadata guaranteed by
94+
/// runtime checks.
95+
LoopVersioning *LVer;
96+
9397
/// Check if \p I can be widened at the start of \p Range and possibly
9498
/// decrease the range such that the returned value holds for the entire \p
9599
/// Range. The function should not be called for memory instructions or calls.
@@ -155,9 +159,10 @@ class VPRecipeBuilder {
155159
const TargetTransformInfo *TTI,
156160
LoopVectorizationLegality *Legal,
157161
LoopVectorizationCostModel &CM,
158-
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
162+
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
163+
LoopVersioning *LVer = nullptr)
159164
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
160-
CM(CM), PSE(PSE), Builder(Builder) {}
165+
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
161166

162167
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
163168
auto It = ScaledReductionMap.find(ExitInst);
@@ -233,6 +238,10 @@ class VPRecipeBuilder {
233238
}
234239
return Plan.getOrAddLiveIn(V);
235240
}
241+
242+
/// Returns the metatadata that can be preserved from the original instruction
243+
/// \p I, including noalias metadata guaranteed by runtime checks.
244+
VPIRMetadata getRecipeMetadata(Instruction *I) const;
236245
};
237246
} // end namespace llvm
238247

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,9 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
219219
DominatorTree *DT, AssumptionCache *AC,
220220
IRBuilderBase &Builder, VPlan *Plan,
221221
Loop *CurrentParentLoop, Type *CanonicalIVTy)
222-
: TTI(TTI), VF(VF), CFG(DT), LI(LI), AC(AC), Builder(Builder), Plan(Plan),
223-
CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
224-
TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
222+
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
223+
CurrentParentLoop(CurrentParentLoop), LVer(nullptr), TypeAnalysis(CanonicalIVTy),
224+
VPDT(*Plan) {}
225225

226226
Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
227227
if (Def->isLiveIn())
@@ -350,14 +350,6 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
350350
return VectorValue;
351351
}
352352

353-
void VPTransformState::addNewMetadata(Instruction *To,
354-
const Instruction *Orig) {
355-
// If the loop was versioned with memchecks, add the corresponding no-alias
356-
// metadata.
357-
if (LVer && isa<LoadInst, StoreInst>(Orig))
358-
LVer->annotateInstWithNoAlias(To, Orig);
359-
}
360-
361353
void VPTransformState::setDebugLocFrom(DebugLoc DL) {
362354
const DILocation *DIL = DL;
363355
// When a FSDiscriminator is enabled, we don't need to add the multiply

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,10 +1237,14 @@ class VPIRMetadata {
12371237
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
12381238

12391239
protected:
1240-
VPIRMetadata() {}
12411240
VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); }
12421241

12431242
public:
1243+
VPIRMetadata() {}
1244+
VPIRMetadata(ArrayRef<std::pair<unsigned, MDNode *>> Metadata)
1245+
: Metadata(Metadata) {}
1246+
VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
1247+
12441248
/// Add all metadata to \p I.
12451249
void applyMetadata(Instruction &I) const;
12461250
};
@@ -2511,7 +2515,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
25112515
/// copies of the original scalar type, one per lane, instead of producing a
25122516
/// single copy of widened type for all lanes. If the instruction is known to be
25132517
/// uniform only one copy, per lane zero, will be generated.
2514-
class VPReplicateRecipe : public VPRecipeWithIRFlags {
2518+
class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
25152519
/// Indicator if only a single replica per lane is needed.
25162520
bool IsUniform;
25172521

@@ -2520,9 +2524,10 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
25202524

25212525
public:
25222526
VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
2523-
bool IsUniform, VPValue *Mask = nullptr)
2527+
bool IsUniform, VPValue *Mask = nullptr,
2528+
VPIRMetadata Metadata = {})
25242529
: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2525-
IsUniform(IsUniform), IsPredicated(Mask) {
2530+
VPIRMetadata(Metadata), IsUniform(IsUniform), IsPredicated(Mask) {
25262531
if (Mask)
25272532
addOperand(Mask);
25282533
}
@@ -2532,7 +2537,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
25322537
VPReplicateRecipe *clone() override {
25332538
auto *Copy =
25342539
new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2535-
isPredicated() ? getMask() : nullptr);
2540+
isPredicated() ? getMask() : nullptr, *this);
25362541
Copy->transferFlags(*this);
25372542
return Copy;
25382543
}
@@ -2692,8 +2697,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
26922697

26932698
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
26942699
std::initializer_list<VPValue *> Operands,
2695-
bool Consecutive, bool Reverse, DebugLoc DL)
2696-
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(I), Ingredient(I),
2700+
bool Consecutive, bool Reverse,
2701+
const VPIRMetadata &Metadata, DebugLoc DL)
2702+
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
26972703
Consecutive(Consecutive), Reverse(Reverse) {
26982704
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
26992705
}
@@ -2751,16 +2757,17 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
27512757
/// optional mask.
27522758
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
27532759
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2754-
bool Consecutive, bool Reverse, DebugLoc DL)
2760+
bool Consecutive, bool Reverse,
2761+
const VPIRMetadata &Metadata, DebugLoc DL)
27552762
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2756-
Reverse, DL),
2763+
Reverse, Metadata, DL),
27572764
VPValue(this, &Load) {
27582765
setMask(Mask);
27592766
}
27602767

27612768
VPWidenLoadRecipe *clone() override {
27622769
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2763-
getMask(), Consecutive, Reverse,
2770+
getMask(), Consecutive, Reverse, *this,
27642771
getDebugLoc());
27652772
}
27662773

@@ -2792,7 +2799,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27922799
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
27932800
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
27942801
{L.getAddr(), &EVL}, L.isConsecutive(),
2795-
L.isReverse(), L.getDebugLoc()),
2802+
L.isReverse(), L, L.getDebugLoc()),
27962803
VPValue(this, &getIngredient()) {
27972804
setMask(Mask);
27982805
}
@@ -2829,16 +2836,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
28292836
/// to store to and an optional mask.
28302837
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
28312838
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2832-
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2839+
VPValue *Mask, bool Consecutive, bool Reverse,
2840+
const VPIRMetadata &Metadata, DebugLoc DL)
28332841
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2834-
Consecutive, Reverse, DL) {
2842+
Consecutive, Reverse, Metadata, DL) {
28352843
setMask(Mask);
28362844
}
28372845

28382846
VPWidenStoreRecipe *clone() override {
28392847
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
28402848
getStoredValue(), getMask(), Consecutive,
2841-
Reverse, getDebugLoc());
2849+
Reverse, *this, getDebugLoc());
28422850
}
28432851

28442852
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -2872,7 +2880,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
28722880
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
28732881
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
28742882
{S.getAddr(), S.getStoredValue(), &EVL},
2875-
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
2883+
S.isConsecutive(), S.isReverse(), S,
2884+
S.getDebugLoc()) {
28762885
setMask(Mask);
28772886
}
28782887

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ class VPBasicBlock;
3939
class VPRegionBlock;
4040
class VPlan;
4141
class Value;
42-
class LoopVersioning;
4342

4443
/// Returns a calculation for the total number of elements for a given \p VF.
4544
/// For fixed width vectors this value is a constant, whereas for scalable
@@ -284,13 +283,6 @@ struct VPTransformState {
284283
Iter->second[CacheIdx] = V;
285284
}
286285

287-
/// Add additional metadata to \p To that was not present on \p Orig.
288-
///
289-
/// Currently this is used to add the noalias annotations based on the
290-
/// inserted memchecks. Use this for instructions that are *cloned* into the
291-
/// vector loop.
292-
void addNewMetadata(Instruction *To, const Instruction *Orig);
293-
294286
/// Set the debug location in the builder using the debug location \p DL.
295287
void setDebugLocFrom(DebugLoc DL);
296288

@@ -339,13 +331,6 @@ struct VPTransformState {
339331
/// The parent loop object for the current scope, or nullptr.
340332
Loop *CurrentParentLoop = nullptr;
341333

342-
/// LoopVersioning. It's only set up (non-null) if memchecks were
343-
/// used.
344-
///
345-
/// This is currently only used to add no-alias metadata based on the
346-
/// memchecks. The actually versioning is performed manually.
347-
LoopVersioning *LVer = nullptr;
348-
349334
/// VPlan-based type analysis.
350335
VPTypeAnalysis TypeAnalysis;
351336

0 commit comments

Comments
 (0)