Skip to content

Commit 8151696

Browse files
committed
[VPlan] Manage noalias/alias_scope metadata in VPlan.
Use VPIRMetadata added in llvm#135272 to also manage no-alias metadata added by versioning. Note that this means we have to build the no-alias metadata up-front once. If it is not used, it will be discarded automatically.
1 parent e17122f commit 8151696

File tree

10 files changed

+109
-103
lines changed

10 files changed

+109
-103
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class LoopVectorizationLegality;
3636
class LoopVectorizationCostModel;
3737
class PredicatedScalarEvolution;
3838
class LoopVectorizeHints;
39+
class LoopVersioning;
3940
class OptimizationRemarkEmitter;
4041
class TargetTransformInfo;
4142
class TargetLibraryInfo;
@@ -518,7 +519,7 @@ class LoopVectorizationPlanner {
518519
/// returned VPlan is valid for. If no VPlan can be built for the input range,
519520
/// set the largest included VF to the maximum VF for which no plan could be
520521
/// built.
521-
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
522+
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer);
522523

523524
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
524525
/// according to the information gathered by Legal when it checked if it is

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2357,7 +2357,7 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23572357
InputLane = VPLane::getFirstLane();
23582358
Cloned->setOperand(I.index(), State.get(Operand, InputLane));
23592359
}
2360-
State.addNewMetadata(Cloned, Instr);
2360+
RepRecipe->applyMetadata(*Cloned);
23612361

23622362
// Place the cloned scalar in the new loop.
23632363
State.Builder.Insert(Cloned);
@@ -7902,24 +7902,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
79027902
if (VectorizingEpilogue)
79037903
VPlanTransforms::removeDeadRecipes(BestVPlan);
79047904

7905-
// Only use noalias metadata when using memory checks guaranteeing no overlap
7906-
// across all iterations.
7907-
const LoopAccessInfo *LAI = Legal->getLAI();
7908-
std::unique_ptr<LoopVersioning> LVer = nullptr;
7909-
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
7910-
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
7911-
7912-
// We currently don't use LoopVersioning for the actual loop cloning but we
7913-
// still use it to add the noalias metadata.
7914-
// TODO: Find a better way to re-use LoopVersioning functionality to add
7915-
// metadata.
7916-
LVer = std::make_unique<LoopVersioning>(
7917-
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
7918-
PSE.getSE());
7919-
State.LVer = &*LVer;
7920-
State.LVer->prepareNoAliasMetadata();
7921-
}
7922-
79237905
ILV.printDebugTracesAtStart();
79247906

79257907
//===------------------------------------------------===//
@@ -8510,13 +8492,14 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
85108492
Builder.insert(VectorPtr);
85118493
Ptr = VectorPtr;
85128494
}
8495+
auto Metadata = getMetadataToPropagate(I);
85138496
if (LoadInst *Load = dyn_cast<LoadInst>(I))
85148497
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8515-
I->getDebugLoc());
8498+
Metadata, I->getDebugLoc());
85168499

85178500
StoreInst *Store = cast<StoreInst>(I);
85188501
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8519-
Reverse, I->getDebugLoc());
8502+
Reverse, Metadata, I->getDebugLoc());
85208503
}
85218504

85228505
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8891,8 +8874,9 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
88918874
assert((Range.Start.isScalar() || !IsUniform || !IsPredicated ||
88928875
(Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
88938876
"Should not predicate a uniform recipe");
8894-
auto *Recipe = new VPReplicateRecipe(
8895-
I, make_range(Operands.begin(), Operands.end()), IsUniform, BlockInMask);
8877+
auto *Recipe =
8878+
new VPReplicateRecipe(I, make_range(Operands.begin(), Operands.end()),
8879+
IsUniform, BlockInMask, getMetadataToPropagate(I));
88968880
return Recipe;
88978881
}
88988882

@@ -9013,6 +8997,20 @@ bool VPRecipeBuilder::getScaledReductions(
90138997
return false;
90148998
}
90158999

9000+
SmallVector<std::pair<unsigned, MDNode *>>
9001+
VPRecipeBuilder::getMetadataToPropagate(Instruction *I) const {
9002+
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
9003+
::getMetadataToPropagate(I, Metadata);
9004+
if (LVer && isa<LoadInst, StoreInst>(I)) {
9005+
const auto &[AliasScopeMD, NoAliasMD] = LVer->getNoAliasMetadataFor(I);
9006+
if (AliasScopeMD)
9007+
Metadata.emplace_back(LLVMContext::MD_alias_scope, AliasScopeMD);
9008+
if (NoAliasMD)
9009+
Metadata.emplace_back(LLVMContext::MD_noalias, NoAliasMD);
9010+
}
9011+
return Metadata;
9012+
}
9013+
90169014
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
90179015
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range) {
90189016
// First, check for specific widening recipes that deal with inductions, Phi
@@ -9140,10 +9138,22 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
91409138
ElementCount MaxVF) {
91419139
assert(OrigLoop->isInnermost() && "Inner loop expected.");
91429140

9141+
// Only use noalias metadata when using memory checks guaranteeing no overlap
9142+
// across all iterations.
9143+
const LoopAccessInfo *LAI = Legal->getLAI();
9144+
std::unique_ptr<LoopVersioning> LVer = nullptr;
9145+
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
9146+
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
9147+
LVer = std::make_unique<LoopVersioning>(
9148+
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
9149+
PSE.getSE());
9150+
LVer->prepareNoAliasMetadata();
9151+
}
9152+
91439153
auto MaxVFTimes2 = MaxVF * 2;
91449154
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
91459155
VFRange SubRange = {VF, MaxVFTimes2};
9146-
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) {
9156+
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, LVer.get())) {
91479157
bool HasScalarVF = Plan->hasScalarVFOnly();
91489158
// Now optimize the initial VPlan.
91499159
if (!HasScalarVF)
@@ -9408,7 +9418,8 @@ static void addExitUsersForFirstOrderRecurrences(
94089418
}
94099419

94109420
VPlanPtr
9411-
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9421+
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
9422+
LoopVersioning *LVer) {
94129423

94139424
using namespace llvm::VPlanPatternMatch;
94149425
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -9464,7 +9475,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94649475
}
94659476

94669477
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9467-
Builder);
9478+
Builder, LVer);
94689479

94699480
// ---------------------------------------------------------------------------
94709481
// Pre-construction: record ingredients whose recipes we'll need to further
@@ -9570,8 +9581,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95709581
Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
95719582
// Only create recipe for the final invariant store of the reduction.
95729583
if (Legal->isInvariantStoreOfReduction(SI)) {
9573-
auto *Recipe =
9574-
new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */);
9584+
auto *Recipe = new VPReplicateRecipe(
9585+
SI, R.operands(), true /* IsUniform */, /*Mask*/ nullptr,
9586+
RecipeBuilder.getMetadataToPropagate(SI));
95759587
Recipe->insertBefore(*MiddleVPBB, MBIP);
95769588
}
95779589
R.eraseFromParent();
@@ -9753,7 +9765,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
97539765
// Collect mapping of IR header phis to header phi recipes, to be used in
97549766
// addScalarResumePhis.
97559767
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9756-
Builder);
9768+
Builder, nullptr);
97579769
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
97589770
if (isa<VPCanonicalIVPHIRecipe>(&R))
97599771
continue;

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ class VPRecipeBuilder {
9090
/// A mapping of partial reduction exit instructions to their scaling factor.
9191
DenseMap<const Instruction *, unsigned> ScaledReductionMap;
9292

93+
/// Loop versioning instance for getting noalias metadata guaranteed by
94+
/// runtime checks.
95+
LoopVersioning *LVer;
96+
9397
/// Check if \p I can be widened at the start of \p Range and possibly
9498
/// decrease the range such that the returned value holds for the entire \p
9599
/// Range. The function should not be called for memory instructions or calls.
@@ -155,9 +159,10 @@ class VPRecipeBuilder {
155159
const TargetTransformInfo *TTI,
156160
LoopVectorizationLegality *Legal,
157161
LoopVectorizationCostModel &CM,
158-
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
162+
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
163+
LoopVersioning *LVer)
159164
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
160-
CM(CM), PSE(PSE), Builder(Builder) {}
165+
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
161166

162167
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
163168
auto It = ScaledReductionMap.find(ExitInst);
@@ -233,6 +238,11 @@ class VPRecipeBuilder {
233238
}
234239
return Plan.getOrAddLiveIn(V);
235240
}
241+
242+
/// Returns the metatadata that can be preserved from the original instruction
243+
/// \p I, including noalias metadata guaranteed by runtime checks.
244+
SmallVector<std::pair<unsigned, MDNode *>>
245+
getMetadataToPropagate(Instruction *I) const;
236246
};
237247
} // end namespace llvm
238248

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
220220
InnerLoopVectorizer *ILV, VPlan *Plan,
221221
Loop *CurrentParentLoop, Type *CanonicalIVTy)
222222
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
223-
CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
224-
TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
223+
CurrentParentLoop(CurrentParentLoop), TypeAnalysis(CanonicalIVTy),
224+
VPDT(*Plan) {}
225225

226226
Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
227227
if (Def->isLiveIn())
@@ -355,14 +355,6 @@ BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
355355
return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
356356
}
357357

358-
void VPTransformState::addNewMetadata(Instruction *To,
359-
const Instruction *Orig) {
360-
// If the loop was versioned with memchecks, add the corresponding no-alias
361-
// metadata.
362-
if (LVer && isa<LoadInst, StoreInst>(Orig))
363-
LVer->annotateInstWithNoAlias(To, Orig);
364-
}
365-
366358
void VPTransformState::setDebugLocFrom(DebugLoc DL) {
367359
const DILocation *DIL = DL;
368360
// When a FSDiscriminator is enabled, we don't need to add the multiply

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,6 +1208,8 @@ struct VPIRPhi : public VPIRInstruction {
12081208
#endif
12091209
};
12101210

1211+
using MDArrayRef = ArrayRef<std::pair<unsigned, MDNode *>>;
1212+
12111213
/// Helper to manage IR metadata for recipes. It filters out metadata that
12121214
/// cannot be propagated.
12131215
class VPIRMetadata {
@@ -1216,10 +1218,14 @@ class VPIRMetadata {
12161218
protected:
12171219
VPIRMetadata() {}
12181220
VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); }
1221+
VPIRMetadata(MDArrayRef Metadata) : Metadata(Metadata) {}
12191222

12201223
public:
12211224
/// Add all metadata to \p I.
12221225
void applyMetadata(Instruction &I) const;
1226+
1227+
/// Return the IR metadata.
1228+
MDArrayRef getMetadata() const { return Metadata; }
12231229
};
12241230

12251231
/// VPWidenRecipe is a recipe for producing a widened instruction using the
@@ -2483,7 +2489,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
24832489
/// copies of the original scalar type, one per lane, instead of producing a
24842490
/// single copy of widened type for all lanes. If the instruction is known to be
24852491
/// uniform only one copy, per lane zero, will be generated.
2486-
class VPReplicateRecipe : public VPRecipeWithIRFlags {
2492+
class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
24872493
/// Indicator if only a single replica per lane is needed.
24882494
bool IsUniform;
24892495

@@ -2493,19 +2499,20 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
24932499
public:
24942500
template <typename IterT>
24952501
VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
2496-
bool IsUniform, VPValue *Mask = nullptr)
2502+
bool IsUniform, VPValue *Mask = nullptr,
2503+
ArrayRef<std::pair<unsigned, MDNode *>> Metadata = {})
24972504
: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2498-
IsUniform(IsUniform), IsPredicated(Mask) {
2505+
VPIRMetadata(Metadata), IsUniform(IsUniform), IsPredicated(Mask) {
24992506
if (Mask)
25002507
addOperand(Mask);
25012508
}
25022509

25032510
~VPReplicateRecipe() override = default;
25042511

25052512
VPReplicateRecipe *clone() override {
2506-
auto *Copy =
2507-
new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2508-
isPredicated() ? getMask() : nullptr);
2513+
auto *Copy = new VPReplicateRecipe(
2514+
getUnderlyingInstr(), operands(), IsUniform,
2515+
isPredicated() ? getMask() : nullptr, getMetadata());
25092516
Copy->transferFlags(*this);
25102517
return Copy;
25112518
}
@@ -2665,8 +2672,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
26652672

26662673
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
26672674
std::initializer_list<VPValue *> Operands,
2668-
bool Consecutive, bool Reverse, DebugLoc DL)
2669-
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(I), Ingredient(I),
2675+
bool Consecutive, bool Reverse, MDArrayRef Metadata,
2676+
DebugLoc DL)
2677+
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
26702678
Consecutive(Consecutive), Reverse(Reverse) {
26712679
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
26722680
}
@@ -2724,16 +2732,17 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
27242732
/// optional mask.
27252733
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
27262734
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2727-
bool Consecutive, bool Reverse, DebugLoc DL)
2735+
bool Consecutive, bool Reverse, MDArrayRef Metadata,
2736+
DebugLoc DL)
27282737
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2729-
Reverse, DL),
2738+
Reverse, Metadata, DL),
27302739
VPValue(this, &Load) {
27312740
setMask(Mask);
27322741
}
27332742

27342743
VPWidenLoadRecipe *clone() override {
27352744
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2736-
getMask(), Consecutive, Reverse,
2745+
getMask(), Consecutive, Reverse, getMetadata(),
27372746
getDebugLoc());
27382747
}
27392748

@@ -2765,7 +2774,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27652774
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
27662775
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
27672776
{L.getAddr(), &EVL}, L.isConsecutive(),
2768-
L.isReverse(), L.getDebugLoc()),
2777+
L.isReverse(), L.getMetadata(), L.getDebugLoc()),
27692778
VPValue(this, &getIngredient()) {
27702779
setMask(Mask);
27712780
}
@@ -2802,16 +2811,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
28022811
/// to store to and an optional mask.
28032812
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
28042813
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2805-
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2814+
VPValue *Mask, bool Consecutive, bool Reverse,
2815+
MDArrayRef Metadata, DebugLoc DL)
28062816
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2807-
Consecutive, Reverse, DL) {
2817+
Consecutive, Reverse, Metadata, DL) {
28082818
setMask(Mask);
28092819
}
28102820

28112821
VPWidenStoreRecipe *clone() override {
28122822
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
28132823
getStoredValue(), getMask(), Consecutive,
2814-
Reverse, getDebugLoc());
2824+
Reverse, getMetadata(), getDebugLoc());
28152825
}
28162826

28172827
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -2845,7 +2855,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
28452855
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
28462856
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
28472857
{S.getAddr(), S.getStoredValue(), &EVL},
2848-
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
2858+
S.isConsecutive(), S.isReverse(), S.getMetadata(),
2859+
S.getDebugLoc()) {
28492860
setMask(Mask);
28502861
}
28512862

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ class VPBasicBlock;
3838
class VPRegionBlock;
3939
class VPlan;
4040
class Value;
41-
class LoopVersioning;
4241

4342
/// Returns a calculation for the total number of elements for a given \p VF.
4443
/// For fixed width vectors this value is a constant, whereas for scalable
@@ -283,13 +282,6 @@ struct VPTransformState {
283282
Iter->second[CacheIdx] = V;
284283
}
285284

286-
/// Add additional metadata to \p To that was not present on \p Orig.
287-
///
288-
/// Currently this is used to add the noalias annotations based on the
289-
/// inserted memchecks. Use this for instructions that are *cloned* into the
290-
/// vector loop.
291-
void addNewMetadata(Instruction *To, const Instruction *Orig);
292-
293285
/// Set the debug location in the builder using the debug location \p DL.
294286
void setDebugLocFrom(DebugLoc DL);
295287

@@ -341,13 +333,6 @@ struct VPTransformState {
341333
/// The parent loop object for the current scope, or nullptr.
342334
Loop *CurrentParentLoop = nullptr;
343335

344-
/// LoopVersioning. It's only set up (non-null) if memchecks were
345-
/// used.
346-
///
347-
/// This is currently only used to add no-alias metadata based on the
348-
/// memchecks. The actually versioning is performed manually.
349-
LoopVersioning *LVer = nullptr;
350-
351336
/// VPlan-based type analysis.
352337
VPTypeAnalysis TypeAnalysis;
353338

0 commit comments

Comments
 (0)