Skip to content

[VPlan] Add new VPIRPhi overlay for VPIRInsts wrapping phi nodes (NFC). #129387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 11 additions & 13 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9055,14 +9055,14 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) {
auto *ScalarPhiIRI = cast<VPIRInstruction>(&ScalarPhiR);
auto *ScalarPhiI = dyn_cast<PHINode>(&ScalarPhiIRI->getInstruction());
if (!ScalarPhiI)
auto *ScalarPhiIRI = dyn_cast<VPIRPhi>(&ScalarPhiR);
if (!ScalarPhiIRI)
break;

// TODO: Extract final value from induction recipe initially, optimize to
// pre-computed end value together in optimizeInductionExitUsers.
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
auto *VectorPhiR =
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
Expand Down Expand Up @@ -9112,20 +9112,19 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
continue;

for (VPRecipeBase &R : *ExitVPBB) {
auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
auto *ExitIRI = dyn_cast<VPIRPhi>(&R);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Supporting non VPIRInstruction recipes introduced inside ExitVPBB, which admittedly has yet to be exercised, is dropped? Worth a comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep this was intentional and matches behavior in other places, for now there are only IR phis to handle.

if (!ExitIRI)
continue;
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
if (!ExitPhi)
break;
if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock()) {
assert(ExitIRI->getNumOperands() ==
ExitVPBB->getPredecessors().size() &&
"early-exit must update exit values on construction");
continue;
}

PHINode &ExitPhi = ExitIRI->getIRPhi();
BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB);
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
ExitIRI->addOperand(V);
if (V->isLiveIn())
Expand Down Expand Up @@ -10318,11 +10317,10 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
}
for (VPRecipeBase &R : make_early_inc_range(*MainPlan.getScalarHeader())) {
auto *VPIRInst = cast<VPIRInstruction>(&R);
auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction());
if (!IRI)
auto *VPIRInst = dyn_cast<VPIRPhi>(&R);
if (!VPIRInst)
break;
if (EpiWidenedPhis.contains(IRI))
if (EpiWidenedPhis.contains(&VPIRInst->getIRPhi()))
continue;
// There is no corresponding wide induction in the epilogue plan that would
// need a resume value. Remove the VPIRInst wrapping the scalar header phi
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1225,7 +1225,7 @@ VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) {
auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
for (Instruction &I :
make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
VPIRBB->appendRecipe(new VPIRInstruction(I));
VPIRBB->appendRecipe(VPIRInstruction::create(I));
return VPIRBB;
}

Expand Down
37 changes: 33 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1026,22 +1026,28 @@ class VPInstruction : public VPRecipeWithIRFlags,
};

/// A recipe to wrap on original IR instruction not to be modified during
/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
/// and it is used to add a new incoming value for the single predecessor VPBB.
/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
/// Expect PHIs, VPIRInstructions cannot have any operands.
class VPIRInstruction : public VPRecipeBase {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The above documentation deserves an update.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated thanks

Instruction &I;

public:
protected:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth documenting that VPIRInstructions should be constructed by calling create() rather than the constructor directly, as a subclass may potentially be created.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks!

/// VPIRInstruction::create() should be used to create VPIRInstructions, as
/// subclasses may need to be created, e.g. VPIRPhi.
VPIRInstruction(Instruction &I)
: VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}

public:
~VPIRInstruction() override = default;

/// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
/// VPIRInstruction.
static VPIRInstruction *create(Instruction &I);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth documenting create().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks


VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)

VPIRInstruction *clone() override {
auto *R = new VPIRInstruction(I);
auto *R = create(I);
for (auto *Op : operands())
R->addOperand(Op);
return R;
Expand Down Expand Up @@ -1085,6 +1091,29 @@ class VPIRInstruction : public VPRecipeBase {
void extractLastLaneOfOperand(VPBuilder &Builder);
};

/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
/// allowed, and it is used to add a new incoming value for the single
/// predecessor VPBB.
struct VPIRPhi : public VPIRInstruction {
VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {}

static inline bool classof(const VPRecipeBase *U) {
auto *R = dyn_cast<VPIRInstruction>(U);
return R && isa<PHINode>(R->getInstruction());
Comment on lines +1102 to +1103
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed because VPIRPhi reuses VPIRInstruction's VPDEF::VPIRInstructionSC rather than defining its own?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep

}

PHINode &getIRPhi() { return cast<PHINode>(getInstruction()); }

void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
};

/// VPWidenRecipe is a recipe for producing a widened instruction using the
/// opcode and operands of the recipe. This recipe covers most of the
/// traditional vectorization cases where each recipe transforms into a
Expand Down
65 changes: 42 additions & 23 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1061,30 +1061,15 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
}
#endif

void VPIRInstruction::execute(VPTransformState &State) {
assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
"Only PHINodes can have extra operands");
for (const auto &[Idx, Op] : enumerate(operands())) {
VPValue *ExitValue = Op;
auto Lane = vputils::isUniformAfterVectorization(ExitValue)
? VPLane::getFirstLane()
: VPLane::getLastLaneForVF(State.VF);
VPBlockBase *Pred = getParent()->getPredecessors()[Idx];
auto *PredVPBB = Pred->getExitingBasicBlock();
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
// Set insertion point in PredBB in case an extract needs to be generated.
// TODO: Model extracts explicitly.
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPLane(Lane));
auto *Phi = cast<PHINode>(&I);
// If there is no existing block for PredBB in the phi, add a new incoming
// value. Otherwise update the existing incoming value for PredBB.
if (Phi->getBasicBlockIndex(PredBB) == -1)
Phi->addIncoming(V, PredBB);
else
Phi->setIncomingValueForBlock(PredBB, V);
}
VPIRInstruction *VPIRInstruction ::create(Instruction &I) {
if (auto *Phi = dyn_cast<PHINode>(&I))
return new VPIRPhi(*Phi);
return new VPIRInstruction(I);
}

void VPIRInstruction::execute(VPTransformState &State) {
assert(!isa<VPIRPhi>(this) && getNumOperands() == 0 &&
"PHINodes must be handled by VPIRPhi");
// Advance the insert point after the wrapped IR instruction. This allows
// interleaving VPIRInstructions and other recipes.
State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
Expand Down Expand Up @@ -1117,6 +1102,40 @@ void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) {
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "IR " << I;
}
#endif

void VPIRPhi::execute(VPTransformState &State) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are VPIRPhi recipes expected to have at-least one operand?

Suggested change
void VPIRPhi::execute(VPTransformState &State) {
void VPIRPhi::execute(VPTransformState &State) {
assert(getNumOperands() > 0 && "VPIRPhi recipes must have operands");

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are still cases we create VPIRPhi without operands, e.g. if the entry block contains existing lcssa phis, which do not need updating

PHINode *Phi = &getIRPhi();
for (const auto &[Idx, Op] : enumerate(operands())) {
VPValue *ExitValue = Op;
auto Lane = vputils::isUniformAfterVectorization(ExitValue)
? VPLane::getFirstLane()
: VPLane::getLastLaneForVF(State.VF);
VPBlockBase *Pred = getParent()->getPredecessors()[Idx];
auto *PredVPBB = Pred->getExitingBasicBlock();
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
// Set insertion point in PredBB in case an extract needs to be generated.
// TODO: Model extracts explicitly.
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPLane(Lane));
// If there is no existing block for PredBB in the phi, add a new incoming
// value. Otherwise update the existing incoming value for PredBB.
if (Phi->getBasicBlockIndex(PredBB) == -1)
Phi->addIncoming(V, PredBB);
else
Phi->setIncomingValueForBlock(PredBB, V);
}

// Advance the insert point after the wrapped IR instruction. This allows
// interleaving VPIRInstructions and other recipes.
State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPIRPhi::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
VPIRInstruction::print(O, Indent, SlotTracker);

if (getNumOperands() != 0) {
O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This now indicates that the recipe is a VPIRPhi?
Must a VPIRPhi have at-least one operand?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are still cases we create VPIRPhi without operands, e.g. if the entry block contains existing lcssa phis, which do not need updating

Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -798,8 +798,8 @@ void VPlanTransforms::optimizeInductionExitUsers(
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
for (VPRecipeBase &R : *ExitVPBB) {
auto *ExitIRI = cast<VPIRInstruction>(&R);
if (!isa<PHINode>(ExitIRI->getInstruction()))
auto *ExitIRI = dyn_cast<VPIRPhi>(&R);
if (!ExitIRI)
break;

for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {
Expand Down Expand Up @@ -2155,20 +2155,20 @@ void VPlanTransforms::handleUncountableEarlyExit(
VPBuilder MiddleBuilder(NewMiddle);
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
for (VPRecipeBase &R : *VPEarlyExitBlock) {
auto *ExitIRI = cast<VPIRInstruction>(&R);
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
if (!ExitPhi)
auto *ExitIRI = dyn_cast<VPIRPhi>(&R);
if (!ExitIRI)
break;

PHINode &ExitPhi = ExitIRI->getIRPhi();
VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn(
ExitPhi->getIncomingValueForBlock(UncountableExitingBlock));
ExitPhi.getIncomingValueForBlock(UncountableExitingBlock));

if (OrigLoop->getUniqueExitBlock()) {
// If there's a unique exit block, VPEarlyExitBlock has 2 predecessors
// (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB
// which is coming from the original latch.
VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn(
ExitPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
ExitPhi.getIncomingValueForBlock(OrigLoop->getLoopLatch()));
ExitIRI->addOperand(IncomingFromLatch);
ExitIRI->extractLastLaneOfOperand(MiddleBuilder);
}
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,8 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
auto *UI = cast<VPRecipeBase>(U);
// TODO: check dominance of incoming values for phis properly.
if (!UI ||
isa<VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPredInstPHIRecipe>(UI) ||
(isa<VPIRInstruction>(UI) &&
isa<PHINode>(cast<VPIRInstruction>(UI)->getInstruction())) ||
isa<VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPredInstPHIRecipe,
VPIRPhi>(UI) ||
(isa<VPInstruction>(UI) &&
cast<VPInstruction>(UI)->getOpcode() == Instruction::PHI))
continue;
Expand Down