Skip to content

[VPlan] Only store RecurKind + FastMathFlags in VPReductionRecipe. NFCI #131300

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9772,8 +9772,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(BB))
CondOp = RecipeBuilder.getBlockInMask(BB);

// Non-FP RdxDescs will have all fast math flags set, so clear them.
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
: FastMathFlags();
auto *RedRecipe = new VPReductionRecipe(
RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
Expand Down
38 changes: 17 additions & 21 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2239,22 +2239,19 @@ class VPInterleaveRecipe : public VPRecipeBase {
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPRecipeWithIRFlags {
/// The recurrence decriptor for the reduction in question.
const RecurrenceDescriptor &RdxDesc;
/// The recurrence kind for the reduction in question.
RecurKind RdxKind;
bool IsOrdered;
/// Whether the reduction is conditional.
bool IsConditional = false;

protected:
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
Instruction *I, ArrayRef<VPValue *> Operands,
VPValue *CondOp, bool IsOrdered, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands,
isa_and_nonnull<FPMathOperator>(I)
? R.getFastMathFlags()
: FastMathFlags(),
DL),
RdxDesc(R), IsOrdered(IsOrdered) {
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
FastMathFlags FMFs, Instruction *I,
ArrayRef<VPValue *> Operands, VPValue *CondOp,
bool IsOrdered, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
Expand All @@ -2263,19 +2260,19 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
}

public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = {})
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}

~VPReductionRecipe() override = default;

VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
getVecOp(), getCondOp(), IsOrdered,
getDebugLoc());
return new VPReductionRecipe(RdxKind, getFastMathFlags(),
getUnderlyingInstr(), getChainOp(), getVecOp(),
getCondOp(), IsOrdered, getDebugLoc());
}

static inline bool classof(const VPRecipeBase *R) {
Expand All @@ -2301,10 +2298,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
VPSlotTracker &SlotTracker) const override;
#endif

/// Return the recurrence decriptor for the in-loop reduction.
const RecurrenceDescriptor &getRecurrenceDescriptor() const {
return RdxDesc;
}
/// Return the recurrence kind for the in-loop reduction.
RecurKind getRecurrenceKind() const { return RdxKind; }
/// Return true if the in-loop reduction is ordered.
bool isOrdered() const { return IsOrdered; };
/// Return true if the in-loop reduction is conditional.
Expand All @@ -2328,7 +2323,8 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
DebugLoc DL = {})
: VPReductionRecipe(
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), DL) {}
Expand Down
46 changes: 21 additions & 25 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2300,7 +2300,7 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
RecurKind Kind = RdxDesc.getRecurrenceKind();
RecurKind Kind = getRecurrenceKind();
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
"In-loop AnyOf reductions aren't currently supported");
// Propagate the fast-math flags carried by the underlying instruction.
Expand All @@ -2313,8 +2313,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();

Value *Start =
getRecurrenceIdentity(Kind, ElementTy, RdxDesc.getFastMathFlags());
Value *Start = getRecurrenceIdentity(Kind, ElementTy, getFastMathFlags());
if (State.VF.isVector())
Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);

Expand All @@ -2329,18 +2328,19 @@ void VPReductionRecipe::execute(VPTransformState &State) {
createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);
else
NewRed = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind),
PrevInChain, NewVecOp);
PrevInChain = NewRed;
NextInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
NewRed, PrevInChain);
NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
else
NextInChain = State.Builder.CreateBinOp(
(Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind), NewRed,
PrevInChain);
}
State.set(this, NextInChain, /*IsScalar*/ true);
}
Expand All @@ -2351,10 +2351,9 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
Builder.setFastMathFlags(getFastMathFlags());

RecurKind Kind = RdxDesc.getRecurrenceKind();
RecurKind Kind = getRecurrenceKind();
Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
Value *VecOp = State.get(getVecOp());
Value *EVL = State.get(getEVL(), VPLane(0));
Expand All @@ -2377,18 +2376,19 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
else
NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
NewRed, Prev);
NewRed = Builder.CreateBinOp(
(Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind), NewRed,
Prev);
}
State.set(this, NewRed, /*IsScalar*/ true);
}

InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
RecurKind RdxKind = RdxDesc.getRecurrenceKind();
RecurKind RdxKind = getRecurrenceKind();
Type *ElementTy = Ctx.Types.inferScalarType(this);
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
unsigned Opcode = RdxDesc.getOpcode();
unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
FastMathFlags FMFs = getFastMathFlags();

// TODO: Support any-of and in-loop reductions.
Expand All @@ -2401,9 +2401,6 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
ForceTargetInstructionCost.getNumOccurrences() > 0) &&
"In-loop reduction not implemented in VPlan-based cost model currently.");

assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
"Inferred type and recurrence type mismatch.");

// Cost = Reduction cost + BinOp cost
InstructionCost Cost =
Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
Expand All @@ -2426,28 +2423,30 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
printFlags(O);
O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
O << " reduce."
<< Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(getRecurrenceKind()))
<< " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (isConditional()) {
O << ", ";
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
if (RdxDesc.IntermediateStore)
O << " (with final reduction value stored in invariant address sank "
"outside of loop)";
Comment on lines -2436 to -2438
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK to drop this, as the store is sunk explicitly.

}

void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
O << Indent << "REDUCE ";
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
printFlags(O);
O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
O << " vp.reduce."
<< Instruction::getOpcodeName(
RecurrenceDescriptor::getOpcode(getRecurrenceKind()))
<< " (";
getVecOp()->printAsOperand(O, SlotTracker);
O << ", ";
getEVL()->printAsOperand(O, SlotTracker);
Expand All @@ -2456,9 +2455,6 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
if (RdxDesc.IntermediateStore)
O << " (with final reduction value stored in invariant address sank "
"outside of loop)";
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
Expand Down
16 changes: 8 additions & 8 deletions llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1170,8 +1170,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
EXPECT_FALSE(Recipe.mayHaveSideEffects());
EXPECT_FALSE(Recipe.mayReadFromMemory());
EXPECT_FALSE(Recipe.mayWriteToMemory());
Expand All @@ -1185,8 +1185,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 4));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_FALSE(EVLRecipe.mayHaveSideEffects());
Expand Down Expand Up @@ -1540,8 +1540,8 @@ TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) {
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp,
false);
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
EXPECT_TRUE(isa<VPUser>(&Recipe));
VPRecipeBase *BaseR = &Recipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
Expand All @@ -1555,8 +1555,8 @@ TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) {
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp,
false);
VPReductionRecipe Recipe(RecurKind::Add, FastMathFlags(), Add, ChainOp,
CondOp, VecOp, false);
VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_TRUE(isa<VPUser>(&EVLRecipe));
Expand Down