Skip to content

[VPlan] Manage noalias/alias_scope metadata in VPlan. #136450

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class PredicatedScalarEvolution;
class LoopVectorizeHints;
class LoopVersioning;
class OptimizationRemarkEmitter;
class TargetTransformInfo;
class TargetLibraryInfo;
Expand Down Expand Up @@ -524,7 +525,7 @@ class LoopVectorizationPlanner {
/// returned VPlan is valid for. If no VPlan can be built for the input range,
/// set the largest included VF to the maximum VF for which no plan could be
/// built.
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer);

/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
Expand Down
48 changes: 22 additions & 26 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7838,24 +7838,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
if (VectorizingEpilogue)
VPlanTransforms::removeDeadRecipes(BestVPlan);

// Only use noalias metadata when using memory checks guaranteeing no overlap
// across all iterations.
const LoopAccessInfo *LAI = Legal->getLAI();
std::unique_ptr<LoopVersioning> LVer = nullptr;
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
!LAI->getRuntimePointerChecking()->getDiffChecks()) {

// We currently don't use LoopVersioning for the actual loop cloning but we
// still use it to add the noalias metadata.
// TODO: Find a better way to re-use LoopVersioning functionality to add
// metadata.
LVer = std::make_unique<LoopVersioning>(
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
PSE.getSE());
State.LVer = &*LVer;
State.LVer->prepareNoAliasMetadata();
}

ILV.printDebugTracesAtStart();

//===------------------------------------------------===//
Expand Down Expand Up @@ -8468,11 +8450,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
I->getDebugLoc());
VPIRMetadata(*Load, LVer), I->getDebugLoc());

StoreInst *Store = cast<StoreInst>(I);
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
Reverse, I->getDebugLoc());
Reverse, VPIRMetadata(*Store, LVer),
I->getDebugLoc());
}

/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
Expand Down Expand Up @@ -8845,7 +8828,8 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
assert((Range.Start.isScalar() || !IsUniform || !IsPredicated ||
(Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
"Should not predicate a uniform recipe");
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask);
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask,
VPIRMetadata(*I, LVer));
return Recipe;
}

Expand Down Expand Up @@ -9092,10 +9076,20 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
ElementCount MaxVF) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");

const LoopAccessInfo *LAI = Legal->getLAI();
LoopVersioning LVer(*LAI, LAI->getRuntimePointerChecking()->getChecks(),
OrigLoop, LI, DT, PSE.getSE());
if (!LAI->getRuntimePointerChecking()->getChecks().empty() &&
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
// Only use noalias metadata when using memory checks guaranteeing no
// overlap across all iterations.
LVer.prepareNoAliasMetadata();
}

auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange)) {
if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, &LVer)) {
bool HasScalarVF = Plan->hasScalarVFOnly();
// Now optimize the initial VPlan.
if (!HasScalarVF)
Expand Down Expand Up @@ -9357,7 +9351,8 @@ static void addExitUsersForFirstOrderRecurrences(
}

VPlanPtr
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
LoopVersioning *LVer) {

using namespace llvm::VPlanPatternMatch;
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
Expand Down Expand Up @@ -9413,7 +9408,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
}

VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder);
Builder, LVer);

// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
Expand Down Expand Up @@ -9520,7 +9515,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// Only create recipe for the final invariant store of the reduction.
if (Legal->isInvariantStoreOfReduction(SI)) {
auto *Recipe =
new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */);
new VPReplicateRecipe(SI, R.operands(), true /* IsUniform */,
nullptr /*Mask*/, VPIRMetadata(*SI, LVer));
Recipe->insertBefore(*MiddleVPBB, MBIP);
}
R.eraseFromParent();
Expand Down Expand Up @@ -9702,7 +9698,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
// Collect mapping of IR header phis to header phi recipes, to be used in
// addScalarResumePhis.
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder);
Builder, nullptr /*LVer*/);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ class VPRecipeBuilder {
/// A mapping of partial reduction exit instructions to their scaling factor.
DenseMap<const Instruction *, unsigned> ScaledReductionMap;

/// Loop versioning instance for getting noalias metadata guaranteed by
/// runtime checks.
LoopVersioning *LVer;

/// Check if \p I can be widened at the start of \p Range and possibly
/// decrease the range such that the returned value holds for the entire \p
/// Range. The function should not be called for memory instructions or calls.
Expand Down Expand Up @@ -155,9 +159,10 @@ class VPRecipeBuilder {
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM,
PredicatedScalarEvolution &PSE, VPBuilder &Builder)
PredicatedScalarEvolution &PSE, VPBuilder &Builder,
LoopVersioning *LVer)
: Plan(Plan), OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal),
CM(CM), PSE(PSE), Builder(Builder) {}
CM(CM), PSE(PSE), Builder(Builder), LVer(LVer) {}

std::optional<unsigned> getScalingForReduction(const Instruction *ExitInst) {
auto It = ScaledReductionMap.find(ExitInst);
Expand Down
12 changes: 2 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,8 @@ VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
IRBuilderBase &Builder, VPlan *Plan,
Loop *CurrentParentLoop, Type *CanonicalIVTy)
: TTI(TTI), VF(VF), CFG(DT), LI(LI), AC(AC), Builder(Builder), Plan(Plan),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the removal of AC initialization related?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added back, dropped accidentially thanks

CurrentParentLoop(CurrentParentLoop), LVer(nullptr),
TypeAnalysis(CanonicalIVTy), VPDT(*Plan) {}
CurrentParentLoop(CurrentParentLoop), TypeAnalysis(CanonicalIVTy),
VPDT(*Plan) {}

Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
if (Def->isLiveIn())
Expand Down Expand Up @@ -350,14 +350,6 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
return VectorValue;
}

void VPTransformState::addNewMetadata(Instruction *To,
const Instruction *Orig) {
// If the loop was versioned with memchecks, add the corresponding no-alias
// metadata.
if (LVer && isa<LoadInst, StoreInst>(Orig))
LVer->annotateInstWithNoAlias(To, Orig);
}

void VPTransformState::setDebugLocFrom(DebugLoc DL) {
const DILocation *DIL = DL;
// When a FSDiscriminator is enabled, we don't need to add the multiply
Expand Down
47 changes: 31 additions & 16 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class VPReplicateRecipe;
class VPlanSlp;
class Value;
class LoopVectorizationCostModel;
class LoopVersioning;

struct VPCostContext;

Expand Down Expand Up @@ -1236,11 +1237,20 @@ struct VPIRPhi : public VPIRInstruction {
class VPIRMetadata {
SmallVector<std::pair<unsigned, MDNode *>> Metadata;

protected:
public:
VPIRMetadata() {}

/// Adds metatadata that can be preserved from the original instruction
/// \p I.
VPIRMetadata(Instruction &I) { getMetadataToPropagate(&I, Metadata); }

public:
/// Adds metatadata that can be preserved from the original instruction
/// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
VPIRMetadata(Instruction &I, LoopVersioning *LVer);

/// Copy constructor for cloning.
VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
/// Copy constructor for cloning.
VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added thanks


/// Add all metadata to \p I.
void applyMetadata(Instruction &I) const;
};
Expand Down Expand Up @@ -2511,7 +2521,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
/// copies of the original scalar type, one per lane, instead of producing a
/// single copy of widened type for all lanes. If the instruction is known to be
/// uniform only one copy, per lane zero, will be generated.
class VPReplicateRecipe : public VPRecipeWithIRFlags {
class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should replication of loads/stores have a separate recipe, if only they hold metadata? Would that separation be helpful in general.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to break up the recipes, to remove the reliance on the underlying IR instruction. Other case to split off would probably be calls, but probably best done separately, as this will require quite a bit of work (currently it is all based on cloning the existing IR instruction).

VPReplicateRecipes other than loads should also have metadata; currently they use all metadata from the original instruction (due to cloning); this may be incorrect, as only certain IR metadata should be preserved (like widen-recipes, which only preserve a subset of the original metadata), again probably best to fix separately.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit message should mention that this patch also affects how ReplicateRecipe handle metadata. Does it fix the possibly incorrect behavior mentioned above? A test may be helpful.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't fix it yet, it doesn't change the behavior as the exsting cloning of instructions will preserve all metadata, the noalias MD is now added by VPIRMetadata.

/// Indicator if only a single replica per lane is needed.
bool IsUniform;

Expand All @@ -2520,9 +2530,10 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {

public:
VPReplicateRecipe(Instruction *I, ArrayRef<VPValue *> Operands,
bool IsUniform, VPValue *Mask = nullptr)
bool IsUniform, VPValue *Mask = nullptr,
VPIRMetadata Metadata = {})
: VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
IsUniform(IsUniform), IsPredicated(Mask) {
VPIRMetadata(Metadata), IsUniform(IsUniform), IsPredicated(Mask) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be

Suggested change
VPIRMetadata(Metadata), IsUniform(IsUniform), IsPredicated(Mask) {
VPIRMetadata(I, Metadata), IsUniform(IsUniform), IsPredicated(Mask) {

to propagate I's metadata when Metadata if empty, as in the default?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to use VPIRMetadata(I, LVer), to be passed in

if (Mask)
addOperand(Mask);
}
Expand All @@ -2532,7 +2543,7 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
VPReplicateRecipe *clone() override {
auto *Copy =
new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
isPredicated() ? getMask() : nullptr);
isPredicated() ? getMask() : nullptr, *this);
Copy->transferFlags(*this);
return Copy;
}
Expand Down Expand Up @@ -2692,8 +2703,9 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {

VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
std::initializer_list<VPValue *> Operands,
bool Consecutive, bool Reverse, DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(I), Ingredient(I),
bool Consecutive, bool Reverse,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
}
Expand Down Expand Up @@ -2751,16 +2763,17 @@ class VPWidenMemoryRecipe : public VPRecipeBase, public VPIRMetadata {
/// optional mask.
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
bool Consecutive, bool Reverse, DebugLoc DL)
bool Consecutive, bool Reverse,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
Reverse, DL),
Reverse, Metadata, DL),
VPValue(this, &Load) {
setMask(Mask);
}

VPWidenLoadRecipe *clone() override {
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
getMask(), Consecutive, Reverse,
getMask(), Consecutive, Reverse, *this,
getDebugLoc());
}

Expand Down Expand Up @@ -2792,7 +2805,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
{L.getAddr(), &EVL}, L.isConsecutive(),
L.isReverse(), L.getDebugLoc()),
L.isReverse(), L, L.getDebugLoc()),
VPValue(this, &getIngredient()) {
setMask(Mask);
}
Expand Down Expand Up @@ -2829,16 +2842,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
/// to store to and an optional mask.
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
VPValue *Mask, bool Consecutive, bool Reverse,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
Consecutive, Reverse, DL) {
Consecutive, Reverse, Metadata, DL) {
setMask(Mask);
}

VPWidenStoreRecipe *clone() override {
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
getStoredValue(), getMask(), Consecutive,
Reverse, getDebugLoc());
Reverse, *this, getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
Expand Down Expand Up @@ -2872,7 +2886,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
{S.getAddr(), S.getStoredValue(), &EVL},
S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
S.isConsecutive(), S.isReverse(), S,
S.getDebugLoc()) {
setMask(Mask);
}

Expand Down
15 changes: 0 additions & 15 deletions llvm/lib/Transforms/Vectorize/VPlanHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class VPBasicBlock;
class VPRegionBlock;
class VPlan;
class Value;
class LoopVersioning;

/// Returns a calculation for the total number of elements for a given \p VF.
/// For fixed width vectors this value is a constant, whereas for scalable
Expand Down Expand Up @@ -284,13 +283,6 @@ struct VPTransformState {
Iter->second[CacheIdx] = V;
}

/// Add additional metadata to \p To that was not present on \p Orig.
///
/// Currently this is used to add the noalias annotations based on the
/// inserted memchecks. Use this for instructions that are *cloned* into the
/// vector loop.
void addNewMetadata(Instruction *To, const Instruction *Orig);

/// Set the debug location in the builder using the debug location \p DL.
void setDebugLocFrom(DebugLoc DL);

Expand Down Expand Up @@ -339,13 +331,6 @@ struct VPTransformState {
/// The parent loop object for the current scope, or nullptr.
Loop *CurrentParentLoop = nullptr;

/// LoopVersioning. It's only set up (non-null) if memchecks were
/// used.
///
/// This is currently only used to add no-alias metadata based on the
/// memchecks. The actually versioning is performed manually.
LoopVersioning *LVer = nullptr;

/// VPlan-based type analysis.
VPTypeAnalysis TypeAnalysis;

Expand Down
Loading
Loading