Skip to content

[VPlan] Add getSCEVExprForVPValue util, use to get trip count SCEV (NFC) #94464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 18, 2024
26 changes: 9 additions & 17 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,15 +905,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) {
return B.CreateElementCount(Ty, VF);
}

const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
Loop *OrigLoop) {
const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");

ScalarEvolution &SE = *PSE.getSE();
return SE.getTripCountFromExitCount(BackedgeTakenCount, IdxTy, OrigLoop);
}

void reportVectorizationFailure(const StringRef DebugMsg,
const StringRef OREMsg, const StringRef ORETag,
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
Expand Down Expand Up @@ -4750,7 +4741,10 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
if (!MainLoopVF.isScalable() && !NextVF.Width.isScalable()) {
// TODO: extend to support scalable VFs.
if (!RemainingIterations) {
const SCEV *TC = createTripCountSCEV(TCType, PSE, OrigLoop);
const SCEV *TC = vputils::getSCEVExprForVPValue(
getPlanFor(NextVF.Width).getTripCount(), SE);
assert(!isa<SCEVCouldNotCompute>(TC) &&
"Trip count SCEV must be computable");
RemainingIterations = SE.getURemExpr(
TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC));
}
Expand Down Expand Up @@ -8863,10 +8857,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
return !CM.requiresScalarEpilogue(VF.isVector());
},
Range);
VPlanPtr Plan = VPlan::createInitialVPlan(
createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
*PSE.getSE(), RequiresScalarEpilogueCheck, CM.foldTailByMasking(),
OrigLoop);
VPlanPtr Plan = VPlan::createInitialVPlan(Legal->getWidestInductionType(),
PSE, RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop);

// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
Expand Down Expand Up @@ -9081,9 +9074,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");

// Create new empty VPlan
auto Plan = VPlan::createInitialVPlan(
createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
*PSE.getSE(), true, false, OrigLoop);
auto Plan = VPlan::createInitialVPlan(Legal->getWidestInductionType(), PSE,
true, false, OrigLoop);

// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,14 +869,23 @@ static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) {
return VPIRBB;
}

VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE,
VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop) {
VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader());
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);

// Create SCEV and VPValue for the trip count.
const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");
ScalarEvolution &SE = *PSE.getSE();
const SCEV *TripCount =
SE.getTripCountFromExitCount(BackedgeTakenCount, InductionTy, TheLoop);
Plan->TripCount =
vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);

// Create VPRegionBlock, with empty header and latch blocks, to be filled
// during processing later.
VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step);

const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
Loop *CurLoop = nullptr);

/// A helper function that returns the reciprocal of the block probability of
/// predicated blocks. If we return X, we are assuming the predicated block
/// will execute once for every X iterations of the loop header.
Expand Down Expand Up @@ -3477,8 +3474,10 @@ class VPlan {
/// middle VPBasicBlock. If a check is needed to guard executing the scalar
/// epilogue loop, it will be added to the middle block, together with
/// VPBasicBlocks for the scalar preheader and exit blocks.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: would be good to explain what IdxTy stands for, and/or use a more descriptive name.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to InductionTy and documented, thanks!

static VPlanPtr createInitialVPlan(const SCEV *TripCount,
ScalarEvolution &PSE,
/// \p InductionTy is the type of the canonical induction and used for related
/// values, like the trip count expression.
static VPlanPtr createInitialVPlan(Type *InductionTy,
PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop);

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -685,10 +685,11 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))
return;

Type *IdxTy =
Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
ScalarEvolution &SE = *PSE.getSE();
const SCEV *TripCount =
vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, independently - BOC below would better use an unconditional branch than BranchOnCond with true condition.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, will also entail to remove the region and header phis.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth asserting that the SCEV returned could be computed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, added assert.

assert(!isa<SCEVCouldNotCompute>(TripCount) &&
"Trip count SCEV must be computable");
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
if (TripCount->isZero() ||
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "VPlanUtils.h"
#include "VPlanPatternMatch.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"

using namespace llvm;
Expand Down Expand Up @@ -60,3 +61,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
return match(V, m_Binary<Instruction::ICmp>(m_VPValue(A), m_VPValue(B))) &&
IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount();
}

const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
if (V->isLiveIn())
return SE.getSCEV(V->getLiveInIRValue());

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to work well for Values at VPlan's boundary: can complement live-ins with live-out/VPIRInstructions which could also simply retrieve SE.getSCEV() of the instruction they wrap; as follow-up, along with concrete use.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep

// TODO: Support constructing SCEVs for more recipes as needed.
return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())
.Case<VPExpandSCEVRecipe>(
[](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
.Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
}
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@

#include "VPlan.h"

namespace llvm {
class ScalarEvolution;
class SCEV;
} // namespace llvm

namespace llvm::vputils {
/// Returns true if only the first lane of \p Def is used.
bool onlyFirstLaneUsed(const VPValue *Def);
Expand All @@ -26,6 +31,10 @@ bool onlyFirstPartUsed(const VPValue *Def);
VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
ScalarEvolution &SE);

/// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no
/// SCEV expression could be constructed.
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);

/// Returns true if \p VPV is uniform after vectorization.
inline bool isUniformAfterVectorization(const VPValue *VPV) {
// A value defined outside the vector region must be uniform after
Expand Down
7 changes: 3 additions & 4 deletions llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,12 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
raw_string_ostream OS(FullDump);
Plan->printDOT(OS);
const char *ExpectedStr = R"(digraph VPlan {
graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nvp\<%1\> = original trip-count\n"]
graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
node [shape=rect, fontname=Courier, fontsize=30]
edge [fontname=Courier, fontsize=30]
compound=true
N0 [label =
"ir-bb\<entry\>:\l" +
" EMIT vp\<%1\> = EXPAND SCEV (-1 + %N)\l" +
"No successors\l"
]
N1 [label =
Expand All @@ -134,8 +133,8 @@ compound=true
N2 -> N4 [ label="" ltail=cluster_N3]
N4 [label =
"middle.block:\l" +
" EMIT vp\<%2\> = icmp eq vp\<%1\>, vp\<%0\>\l" +
" EMIT branch-on-cond vp\<%2\>\l" +
" EMIT vp\<%1\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
" EMIT branch-on-cond vp\<%1\>\l" +
"Successor(s): ir-bb\<for.end\>, scalar.ph\l"
]
N4 -> N5 [ label="T"]
Expand Down
18 changes: 10 additions & 8 deletions llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ class VPlanTestBase : public testing::Test {
assert(!verifyFunction(F) && "input function must be valid");
doAnalysis(F);

auto Plan = VPlan::createInitialVPlan(
SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE, true, false,
LI->getLoopFor(LoopHeader));
VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan);
Loop *L = LI->getLoopFor(LoopHeader);
PredicatedScalarEvolution PSE(*SE, *L);
auto Plan = VPlan::createInitialVPlan(IntegerType::get(*Ctx, 64), PSE, true,
false, L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
HCFGBuilder.buildHierarchicalCFG();
return Plan;
}
Expand All @@ -81,10 +82,11 @@ class VPlanTestBase : public testing::Test {
assert(!verifyFunction(F) && "input function must be valid");
doAnalysis(F);

auto Plan = VPlan::createInitialVPlan(
SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE, true, false,
LI->getLoopFor(LoopHeader));
VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan);
Loop *L = LI->getLoopFor(LoopHeader);
PredicatedScalarEvolution PSE(*SE, *L);
auto Plan = VPlan::createInitialVPlan(IntegerType::get(*Ctx, 64), PSE, true,
false, L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
HCFGBuilder.buildPlainCFG();
return Plan;
}
Expand Down