Skip to content

[VPlan] Use VPPhiAccessors for VPIRPhi. #129389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 13 additions & 17 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3058,11 +3058,9 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
PHINode *NewPhi = cast<PHINode>(State.get(VPPhi));
// Make sure the builder has a valid insert point.
Builder.SetInsertPoint(NewPhi);
for (unsigned Idx = 0; Idx < VPPhi->getNumOperands(); ++Idx) {
VPValue *Inc = VPPhi->getIncomingValue(Idx);
VPBasicBlock *VPBB = VPPhi->getIncomingBlock(Idx);

for (const auto &[Inc, VPBB] : VPPhi->incoming_values_and_blocks())
NewPhi->addIncoming(State.get(Inc), State.CFG.VPBB2IRBB[VPBB]);
}
}
}
}
Expand Down Expand Up @@ -9074,14 +9072,14 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) {
auto *ScalarPhiIRI = cast<VPIRInstruction>(&ScalarPhiR);
auto *ScalarPhiI = dyn_cast<PHINode>(&ScalarPhiIRI->getInstruction());
if (!ScalarPhiI)
auto *ScalarPhiIRI = dyn_cast<VPIRPhi>(&ScalarPhiR);
if (!ScalarPhiIRI)
break;

// TODO: Extract final value from induction recipe initially, optimize to
// pre-computed end value together in optimizeInductionExitUsers.
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
auto *VectorPhiR =
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
Expand Down Expand Up @@ -9131,20 +9129,19 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
continue;

for (VPRecipeBase &R : *ExitVPBB) {
auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
auto *ExitIRI = dyn_cast<VPIRPhi>(&R);
if (!ExitIRI)
continue;
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
if (!ExitPhi)
break;
if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock()) {
assert(ExitIRI->getNumOperands() ==
ExitVPBB->getPredecessors().size() &&
"early-exit must update exit values on construction");
continue;
}

PHINode &ExitPhi = ExitIRI->getIRPhi();
BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB);
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
ExitIRI->addOperand(V);
if (V->isLiveIn())
Expand Down Expand Up @@ -10325,11 +10322,10 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
}
for (VPRecipeBase &R : make_early_inc_range(*MainPlan.getScalarHeader())) {
auto *VPIRInst = cast<VPIRInstruction>(&R);
auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction());
if (!IRI)
auto *VPIRInst = dyn_cast<VPIRPhi>(&R);
if (!VPIRInst)
break;
if (EpiWidenedPhis.contains(IRI))
if (EpiWidenedPhis.contains(&VPIRInst->getIRPhi()))
continue;
// There is no corresponding wide induction in the epilogue plan that would
// need a resume value. Remove the VPIRInst wrapping the scalar header phi
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1227,7 +1227,7 @@ VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) {
auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
for (Instruction &I :
make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
VPIRBB->appendRecipe(new VPIRInstruction(I));
VPIRBB->appendRecipe(VPIRInstruction::create(I));
return VPIRBB;
}

Expand Down
93 changes: 84 additions & 9 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1033,16 +1033,19 @@ class VPInstruction : public VPRecipeWithIRFlags,
class VPIRInstruction : public VPRecipeBase {
Instruction &I;

public:
protected:
VPIRInstruction(Instruction &I)
: VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}

public:
~VPIRInstruction() override = default;

static VPIRInstruction *create(Instruction &I);

VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)

VPIRInstruction *clone() override {
auto *R = new VPIRInstruction(I);
auto *R = create(I);
for (auto *Op : operands())
R->addOperand(Op);
return R;
Expand Down Expand Up @@ -1086,6 +1089,83 @@ class VPIRInstruction : public VPRecipeBase {
void extractLastLaneOfOperand(VPBuilder &Builder);
};

/// Helper type to provide functions to access incoming values and blocks for
/// phi-like recipes. RecipeTy must be a sub-class of VPRecipeBase.
template <typename RecipeTy> class VPPhiAccessors {
/// Return a VPRecipeBase* to the current object.
const VPRecipeBase *getAsRecipe() const {
return static_cast<const RecipeTy *>(this);
}

public:
/// Returns the \p I th incoming VPValue.
VPValue *getIncomingValue(unsigned I) const {
return getAsRecipe()->getOperand(I);
}

/// Returns an interator range over the incoming values
VPUser::const_operand_range incoming_values() const {
return getAsRecipe()->operands();
}

/// Returns the \p I th incoming block.
const VPBasicBlock *getIncomingBlock(unsigned Idx) const;

using const_incoming_block_iterator =
mapped_iterator<detail::index_iterator,
std::function<const VPBasicBlock *(size_t)>>;
using const_incoming_blocks_range =
iterator_range<const_incoming_block_iterator>;

const_incoming_block_iterator incoming_block_begin() const {
return const_incoming_block_iterator(
detail::index_iterator(0),
[this](size_t Idx) { return getIncomingBlock(Idx); });
}
const_incoming_block_iterator incoming_block_end() const {
return const_incoming_block_iterator(
detail::index_iterator(getAsRecipe()->getVPDefID() ==
VPDef::VPWidenIntOrFpInductionSC
? 2
: getAsRecipe()->getNumOperands()),
[this](size_t Idx) { return getIncomingBlock(Idx); });
}

/// Returns an iterator range over the incoming blocks.
const_incoming_blocks_range incoming_blocks() const {
return make_range(incoming_block_begin(), incoming_block_end());
}

/// Returns an iterator range over pairs of incoming values and corrsponding
/// incoming blocks.
detail::zippy<llvm::detail::zip_shortest, VPUser::const_operand_range,
const_incoming_blocks_range>
incoming_values_and_blocks() const {
return zip(incoming_values(), incoming_blocks());
}
};

/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
/// cast/dyn_cast/isa and execute() implementation.
struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors<VPIRPhi> {
VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {}

static inline bool classof(const VPRecipeBase *U) {
auto *R = dyn_cast<VPIRInstruction>(U);
return R && isa<PHINode>(R->getInstruction());
}

PHINode &getIRPhi() { return cast<PHINode>(getInstruction()); }

void execute(VPTransformState &State) override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
};

/// VPWidenRecipe is a recipe for producing a widened instruction using the
/// opcode and operands of the recipe. This recipe covers most of the
/// traditional vectorization cases where each recipe transforms into a
Expand Down Expand Up @@ -1923,7 +2003,8 @@ class VPScalarPHIRecipe : public VPHeaderPHIRecipe {
/// recipe is placed in an entry block to a (non-replicate) region, it must have
/// exactly 2 incoming values, the first from the predecessor of the region and
/// the second from the exiting block of the region.
class VPWidenPHIRecipe : public VPSingleDefRecipe {
class VPWidenPHIRecipe : public VPSingleDefRecipe,
public VPPhiAccessors<VPWidenPHIRecipe> {
/// Name to use for the generated IR instruction for the widened phi.
std::string Name;

Expand Down Expand Up @@ -1954,12 +2035,6 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// Returns the \p I th incoming VPBasicBlock.
VPBasicBlock *getIncomingBlock(unsigned I);

/// Returns the \p I th incoming VPValue.
VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
};

/// A recipe for handling first-order recurrence phis. The start value is the
Expand Down
125 changes: 77 additions & 48 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -972,30 +972,15 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
}
#endif

void VPIRInstruction::execute(VPTransformState &State) {
assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
"Only PHINodes can have extra operands");
for (const auto &[Idx, Op] : enumerate(operands())) {
VPValue *ExitValue = Op;
auto Lane = vputils::isUniformAfterVectorization(ExitValue)
? VPLane::getFirstLane()
: VPLane::getLastLaneForVF(State.VF);
VPBlockBase *Pred = getParent()->getPredecessors()[Idx];
auto *PredVPBB = Pred->getExitingBasicBlock();
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
// Set insertion point in PredBB in case an extract needs to be generated.
// TODO: Model extracts explicitly.
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPLane(Lane));
auto *Phi = cast<PHINode>(&I);
// If there is no existing block for PredBB in the phi, add a new incoming
// value. Otherwise update the existing incoming value for PredBB.
if (Phi->getBasicBlockIndex(PredBB) == -1)
Phi->addIncoming(V, PredBB);
else
Phi->setIncomingValueForBlock(PredBB, V);
}
VPIRInstruction *VPIRInstruction ::create(Instruction &I) {
if (auto *Phi = dyn_cast<PHINode>(&I))
return new VPIRPhi(*Phi);
return new VPIRInstruction(I);
}

void VPIRInstruction::execute(VPTransformState &State) {
assert(!isa<VPIRPhi>(this) && getNumOperands() == 0 &&
"PHINodes must be handled by VPIRPhi");
// Advance the insert point after the wrapped IR instruction. This allows
// interleaving VPIRInstructions and other recipes.
State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));
Expand Down Expand Up @@ -1028,15 +1013,74 @@ void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) {
void VPIRInstruction::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "IR " << I;
}
#endif

static const VPBasicBlock *getIncomingBlockForRecipe(const VPRecipeBase *R,
unsigned I) {
const VPBasicBlock *Parent = R->getParent();
const VPBlockBase *Pred = nullptr;
if (Parent->getNumPredecessors() > 0) {
Pred = Parent->getPredecessors()[I];
} else {
auto *Region = Parent->getParent();
assert(Region && !Region->isReplicator() && Region->getEntry() == Parent &&
"must be in the entry block of a non-replicate region");
assert(
I < 2 &&
(R->getNumOperands() == 2 || isa<VPWidenIntOrFpInductionRecipe>(R)) &&
"when placed in an entry block, only 2 incoming blocks are available");

// I == 0 selects the predecessor of the region, I == 1 selects the region
// itself whose exiting block feeds the phi across the backedge.
Pred = I == 0 ? Region->getSinglePredecessor() : Region;
}

return Pred->getExitingBasicBlock();
}

template <>
const VPBasicBlock *
VPPhiAccessors<VPIRPhi>::getIncomingBlock(unsigned Idx) const {
return getIncomingBlockForRecipe(getAsRecipe(), Idx);
}

void VPIRPhi::execute(VPTransformState &State) {
PHINode *Phi = &getIRPhi();
for (const auto &[ExitValue, IncVPBB] : incoming_values_and_blocks()) {
auto Lane = vputils::isUniformAfterVectorization(ExitValue)
? VPLane::getFirstLane()
: VPLane::getLastLaneForVF(State.VF);
BasicBlock *PredBB = State.CFG.VPBB2IRBB[IncVPBB];
// Set insertion point in PredBB in case an extract needs to be generated.
// TODO: Model extracts explicitly.
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
Value *V = State.get(ExitValue, VPLane(Lane));
// If there is no existing block for PredBB in the phi, add a new incoming
// value. Otherwise update the existing incoming value for PredBB.
if (Phi->getBasicBlockIndex(PredBB) == -1)
Phi->addIncoming(V, PredBB);
else
Phi->setIncomingValueForBlock(PredBB, V);
}

// Advance the insert point after the wrapped IR instruction. This allows
// interleaving VPIRInstructions and other recipes.
State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPIRPhi::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
VPIRInstruction::print(O, Indent, SlotTracker);

if (getNumOperands() != 0) {
O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";
interleaveComma(
enumerate(operands()), O, [this, &O, &SlotTracker](auto Op) {
Op.value()->printAsOperand(O, SlotTracker);
O << " from ";
getParent()->getPredecessors()[Op.index()]->printAsOperand(O);
});
interleaveComma(incoming_values_and_blocks(), O, [&O, &SlotTracker](auto Op) {
std::get<0>(Op)->printAsOperand(O, SlotTracker);
O << " from ";
std::get<1>(Op)->printAsOperand(O);
});
O << ")";
}
}
Expand Down Expand Up @@ -3589,25 +3633,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif

VPBasicBlock *VPWidenPHIRecipe::getIncomingBlock(unsigned I) {
VPBasicBlock *Parent = getParent();
VPBlockBase *Pred = nullptr;
if (Parent->getNumPredecessors() > 0) {
Pred = Parent->getPredecessors()[I];
} else {
auto *Region = Parent->getParent();
assert(Region && !Region->isReplicator() && Region->getEntry() == Parent &&
"must be in the entry block of a non-replicate region");
assert(
I < 2 && getNumOperands() == 2 &&
"when placed in an entry block, only 2 incoming blocks are available");

// I == 0 selects the predecessor of the region, I == 1 selects the region
// itself whose exiting block feeds the phi across the backedge.
Pred = I == 0 ? Region->getSinglePredecessor() : Region;
}

return Pred->getExitingBasicBlock();
template <>
const VPBasicBlock *
VPPhiAccessors<VPWidenPHIRecipe>::getIncomingBlock(unsigned Idx) const {
return getIncomingBlockForRecipe(getAsRecipe(), Idx);
}

void VPWidenPHIRecipe::execute(VPTransformState &State) {
Expand Down
Loading
Loading