Skip to content

Commit 755bb7a

Browse files
committed
[VPlan] Manage noalias/alias_scope metadata in VPlan.
Use VPIRMetadata added in llvm#135272 to also manage no-alias metadata added by versioning. Note that this means we have to build the no-alias metadata up-front once. If it is not used, it will be discarded automatically. !fixup move applyMetadata; !fixup address comments, thanks !fixup address remaining comments, thanks !fixup address latest comments, thanks
1 parent 127f486 commit 755bb7a

File tree

10 files changed

+100
-100
lines changed

10 files changed

+100
-100
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class LoopVectorizationLegality;
3636
class LoopVectorizationCostModel;
3737
class PredicatedScalarEvolution;
3838
class LoopVectorizeHints;
39+
class LoopVersioning;
3940
class OptimizationRemarkEmitter;
4041
class TargetTransformInfo;
4142
class TargetLibraryInfo;
@@ -524,7 +525,7 @@ class LoopVectorizationPlanner {
524525
/// returned VPlan is valid for. If no VPlan can be built for the input range,
525526
/// set the largest included VF to the maximum VF for which no plan could be
526527
/// built.
527-
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
528+
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer);
528529

529530
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
530531
/// according to the information gathered by Legal when it checked if it is

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7837,24 +7837,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78377837
if (VectorizingEpilogue)
78387838
VPlanTransforms::removeDeadRecipes(BestVPlan);
78397839

7840-
// Only use noalias metadata when using memory checks guaranteeing no overlap
7841-
// across all iterations.
7842-
const LoopAccessInfo *LAI = Legal->getLAI();
7843-
std::unique_ptr<LoopVersioning> LVer = nullptr;
7844-
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
7845-
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
7846-
7847-
// We currently don't use LoopVersioning for the actual loop cloning but we
7848-
// still use it to add the noalias metadata.
7849-
// TODO: Find a better way to re-use LoopVersioning functionality to add
7850-
// metadata.
7851-
LVer = std::make_unique<LoopVersioning>(
7852-
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
7853-
PSE.getSE());
7854-
State.LVer = &*LVer;
7855-
State.LVer->prepareNoAliasMetadata();
7856-
}
7857-
78587840
ILV.printDebugTracesAtStart();
78597841

78607842
//===------------------------------------------------===//
@@ -8465,13 +8447,14 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
84658447
Builder.insert(VectorPtr);
84668448
Ptr = VectorPtr;
84678449
}
8450+
auto Metadata = getRecipeMetadata(I);
84688451
if (LoadInst *Load = dyn_cast<LoadInst>(I))
84698452
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
8470-
I->getDebugLoc());
8453+
Metadata, I->getDebugLoc());
84718454

84728455
StoreInst *Store = cast<StoreInst>(I);
84738456
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
8474-
Reverse, I->getDebugLoc());
8457+
Reverse, Metadata, I->getDebugLoc());
84758458
}
84768459

84778460
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8844,7 +8827,8 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
88448827
assert((Range.Start.isScalar() || !IsUniform || !IsPredicated ||
88458828
(Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
88468829
"Should not predicate a uniform recipe");
8847-
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask);
8830+
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask,
8831+
getRecipeMetadata(I));
88488832
return Recipe;
88498833
}
88508834

@@ -8965,6 +8949,20 @@ bool VPRecipeBuilder::getScaledReductions(
89658949
return false;
89668950
}
89678951

8952+
VPIRMetadata VPRecipeBuilder::getRecipeMetadata(Instruction *I) const {
8953+
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
8954+
::getMetadataToPropagate(I, Metadata);
8955+
if (!LVer || !isa<LoadInst, StoreInst>(I))
8956+
return {};
8957+
8958+
const auto &[AliasScopeMD, NoAliasMD] = LVer->getNoAliasMetadataFor(I);
8959+
if (AliasScopeMD)
8960+
Metadata.emplace_back(LLVMContext::MD_alias_scope, AliasScopeMD);
8961+
if (NoAliasMD)
8962+
Metadata.emplace_back(LLVMContext::MD_noalias, NoAliasMD);
8963+
return {Metadata};
8964+
}
8965+
89688966
VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
89698967
Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range) {
89708968
// First, check for specific widening recipes that deal with inductions, Phi
@@ -9091,10 +9089,20 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
90919089
ElementCount MaxVF) {
90929090
assert(OrigLoop->isInnermost() && "Inner loop expected.");
90939091

9092+
const LoopAccessInfo *LAI = Legal->getLAI();
9093+
LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
9094+
OrigLoop, LI, DT, PSE.getSE());
9095+
if (!LAI->getRuntimePointerChecking()->getChecks().empty() &&
9096+
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
9097+
// Only use noalias metadata when using memory checks guaranteeing no
9098+
// overlap across all iterations.
9099+
LVer.prepareNoAliasMetadata();
9100+
}
9101+
90949102
auto MaxVFTimes2 = MaxVF * 2;
90959103
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
90969104
VFRange SubRange = {VF, MaxVFTimes2};
9097-
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) {
9105+
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, &LVer)) {
90989106
bool HasScalarVF = Plan->hasScalarVFOnly();
90999107
// Now optimize the initial VPlan.
91009108
if (!HasScalarVF)
@@ -9356,7 +9364,8 @@ static void addExitUsersForFirstOrderRecurrences(
93569364
}
93579365

93589366
VPlanPtr
9359-
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9367+
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
9368+
LoopVersioning *LVer) {
93609369

93619370
using namespace llvm::VPlanPatternMatch;
93629371
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -9412,7 +9421,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94129421
}
94139422

94149423
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9415-
Builder);
9424+
Builder, LVer);
94169425

94179426
// ---------------------------------------------------------------------------
94189427
// Pre-construction: record ingredients whose recipes we'll need to further
@@ -9518,8 +9527,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95189527
Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
95199528
// Only create recipe for the final invariant store of the reduction.
95209529
if (Legal->isInvariantStoreOfReduction(SI)) {
9521-
auto *Recipe =
9522-
new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */);
9530+
auto *Recipe = new VPReplicateRecipe(
9531+
SI, R.operands(), true /* IsUniform */, nullptr /*Mask*/,
9532+
RecipeBuilder.getRecipeMetadata(SI));
95239533
Recipe->insertBefore(*MiddleVPBB, MBIP);
95249534
}
95259535
R.eraseFromParent();

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ class VPRecipeBuilder {
9090
/// A mapping of partial reduction exit instructions to their scaling factor.
9191
DenseMap<const Instruction *, unsigned> ScaledReductionMap;
9292

93+
/// Loop versioning instance for getting noalias metadata guaranteed by
94+
/// runtime checks.
95+
LoopVersioning *LVer;
96+
9397
/// Check if \p I can be widened at the start of \p Range and possibly
9498
/// decrease the range such that the returned value holds for the entire \p
9599
/// Range. The function should not be called for memory instructions or calls.
@@ -155,9 +159,10 @@ class VPRecipeBuilder {
155159
const TargetTransformInfo *TTI,
156160
LoopVectorizationLegality *Legal,
157161
LoopVectorizationCostModel &CM,
158-
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
162+
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
163+
LoopVersioning *LVer = nullptr)
159164
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
160-
CM(CM), PSE(PSE), Builder(Builder) {}
165+
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}
161166

162167
std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
163168
auto It = ScaledReductionMap.find(ExitInst);
@@ -233,6 +238,10 @@ class VPRecipeBuilder {
233238
}
234239
return Plan.getOrAddLiveIn(V);
235240
}
241+
242+
/// Returns the metatadata that can be preserved from the original instruction
243+
/// \p I, including noalias metadata guaranteed by runtime checks.
244+
VPIRMetadata getRecipeMetadata(Instruction *I) const;
236245
};
237246
} // end namespace llvm
238247

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,9 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
219219
DominatorTree *DT, AssumptionCache *AC,
220220
IRBuilderBase &Builder, VPlan *Plan,
221221
Loop *CurrentParentLoop, Type *CanonicalIVTy)
222-
: TTI(TTI), VF(VF), CFG(DT), LI(LI), AC(AC), Builder(Builder), Plan(Plan),
223-
CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
224-
TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
222+
: TTI(TTI), VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
223+
CurrentParentLoop(CurrentParentLoop), LVer(nullptr), TypeAnalysis(CanonicalIVTy),
224+
VPDT(*Plan) {}
225225

226226
Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
227227
if (Def->isLiveIn())
@@ -350,14 +350,6 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
350350
return VectorValue;
351351
}
352352

353-
void VPTransformState::addNewMetadata(Instruction *To,
354-
const Instruction *Orig) {
355-
// If the loop was versioned with memchecks, add the corresponding no-alias
356-
// metadata.
357-
if (LVer && isa<LoadInst, StoreInst>(Orig))
358-
LVer->annotateInstWithNoAlias(To, Orig);
359-
}
360-
361353
void VPTransformState::setDebugLocFrom(DebugLoc DL) {
362354
const DILocation *DIL = DL;
363355
// When a FSDiscriminator is enabled, we don't need to add the multiply

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,10 +1222,14 @@ class VPIRMetadata {
12221222
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
12231223

12241224
protected:
1225-
VPIRMetadata() {}
12261225
VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); }
12271226

12281227
public:
1228+
VPIRMetadata() {}
1229+
VPIRMetadata(ArrayRef<std::pair<unsigned, MDNode *>> Metadata)
1230+
: Metadata(Metadata) {}
1231+
VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
1232+
12291233
/// Add all metadata to \p I.
12301234
void applyMetadata(Instruction &I) const;
12311235
};
@@ -2486,7 +2490,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
24862490
/// copies of the original scalar type, one per lane, instead of producing a
24872491
/// single copy of widened type for all lanes. If the instruction is known to be
24882492
/// uniform only one copy, per lane zero, will be generated.
2489-
class VPReplicateRecipe : public VPRecipeWithIRFlags {
2493+
class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
24902494
/// Indicator if only a single replica per lane is needed.
24912495
bool IsUniform;
24922496

@@ -2495,9 +2499,10 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
24952499

24962500
public:
24972501
VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
2498-
bool IsUniform, VPValue *Mask = nullptr)
2502+
bool IsUniform, VPValue *Mask = nullptr,
2503+
VPIRMetadata Metadata = {})
24992504
: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2500-
IsUniform(IsUniform), IsPredicated(Mask) {
2505+
VPIRMetadata(Metadata), IsUniform(IsUniform), IsPredicated(Mask) {
25012506
if (Mask)
25022507
addOperand(Mask);
25032508
}
@@ -2507,7 +2512,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
25072512
VPReplicateRecipe *clone() override {
25082513
auto *Copy =
25092514
new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2510-
isPredicated() ? getMask() : nullptr);
2515+
isPredicated() ? getMask() : nullptr, *this);
25112516
Copy->transferFlags(*this);
25122517
return Copy;
25132518
}
@@ -2667,8 +2672,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
26672672

26682673
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
26692674
std::initializer_list<VPValue *> Operands,
2670-
bool Consecutive, bool Reverse, DebugLoc DL)
2671-
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(I), Ingredient(I),
2675+
bool Consecutive, bool Reverse,
2676+
const VPIRMetadata &Metadata, DebugLoc DL)
2677+
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
26722678
Consecutive(Consecutive), Reverse(Reverse) {
26732679
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
26742680
}
@@ -2726,16 +2732,17 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
27262732
/// optional mask.
27272733
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
27282734
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
2729-
bool Consecutive, bool Reverse, DebugLoc DL)
2735+
bool Consecutive, bool Reverse,
2736+
const VPIRMetadata &Metadata, DebugLoc DL)
27302737
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2731-
Reverse, DL),
2738+
Reverse, Metadata, DL),
27322739
VPValue(this, &Load) {
27332740
setMask(Mask);
27342741
}
27352742

27362743
VPWidenLoadRecipe *clone() override {
27372744
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2738-
getMask(), Consecutive, Reverse,
2745+
getMask(), Consecutive, Reverse, *this,
27392746
getDebugLoc());
27402747
}
27412748

@@ -2767,7 +2774,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
27672774
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
27682775
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
27692776
{L.getAddr(), &EVL}, L.isConsecutive(),
2770-
L.isReverse(), L.getDebugLoc()),
2777+
L.isReverse(), L, L.getDebugLoc()),
27712778
VPValue(this, &getIngredient()) {
27722779
setMask(Mask);
27732780
}
@@ -2804,16 +2811,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
28042811
/// to store to and an optional mask.
28052812
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
28062813
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
2807-
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2814+
VPValue *Mask, bool Consecutive, bool Reverse,
2815+
const VPIRMetadata &Metadata, DebugLoc DL)
28082816
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2809-
Consecutive, Reverse, DL) {
2817+
Consecutive, Reverse, Metadata, DL) {
28102818
setMask(Mask);
28112819
}
28122820

28132821
VPWidenStoreRecipe *clone() override {
28142822
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
28152823
getStoredValue(), getMask(), Consecutive,
2816-
Reverse, getDebugLoc());
2824+
Reverse, *this, getDebugLoc());
28172825
}
28182826

28192827
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -2847,7 +2855,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
28472855
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
28482856
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
28492857
{S.getAddr(), S.getStoredValue(), &EVL},
2850-
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
2858+
S.isConsecutive(), S.isReverse(), S,
2859+
S.getDebugLoc()) {
28512860
setMask(Mask);
28522861
}
28532862

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ class VPBasicBlock;
3939
class VPRegionBlock;
4040
class VPlan;
4141
class Value;
42-
class LoopVersioning;
4342

4443
/// Returns a calculation for the total number of elements for a given \p VF.
4544
/// For fixed width vectors this value is a constant, whereas for scalable
@@ -284,13 +283,6 @@ struct VPTransformState {
284283
Iter->second[CacheIdx] = V;
285284
}
286285

287-
/// Add additional metadata to \p To that was not present on \p Orig.
288-
///
289-
/// Currently this is used to add the noalias annotations based on the
290-
/// inserted memchecks. Use this for instructions that are *cloned* into the
291-
/// vector loop.
292-
void addNewMetadata(Instruction *To, const Instruction *Orig);
293-
294286
/// Set the debug location in the builder using the debug location \p DL.
295287
void setDebugLocFrom(DebugLoc DL);
296288

@@ -339,13 +331,6 @@ struct VPTransformState {
339331
/// The parent loop object for the current scope, or nullptr.
340332
Loop *CurrentParentLoop = nullptr;
341333

342-
/// LoopVersioning. It's only set up (non-null) if memchecks were
343-
/// used.
344-
///
345-
/// This is currently only used to add no-alias metadata based on the
346-
/// memchecks. The actually versioning is performed manually.
347-
LoopVersioning *LVer = nullptr;
348-
349334
/// VPlan-based type analysis.
350335
VPTypeAnalysis TypeAnalysis;
351336

0 commit comments

Comments
 (0)