-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Manage FindLastIV start value in ComputeFindLastIVResult (NFC) #132690
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesKeep the start value as operand of ComputeFindLastIVResult. A follow-up Depends on #132689 (included in PR) Full diff: https://github.com/llvm/llvm-project/pull/132690.diff 7 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 193f505fb03fe..416a0a70325d1 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -423,7 +423,7 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src,
/// Create a reduction of the given vector \p Src for a reduction of the
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction
/// operation is described by \p Desc.
-Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
+Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, Value *Start,
const RecurrenceDescriptor &Desc);
/// Create an ordered reduction intrinsic using the given recurrence
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 2e7685254f512..f57d95e7722dc 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1233,11 +1233,11 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src,
}
Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
+ Value *Start,
const RecurrenceDescriptor &Desc) {
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
Desc.getRecurrenceKind()) &&
"Unexpected reduction kind");
- Value *StartVal = Desc.getRecurrenceStartValue();
Value *Sentinel = Desc.getSentinelValue();
Value *MaxRdx = Src->getType()->isVectorTy()
? Builder.CreateIntMaxReduce(Src, true)
@@ -1246,7 +1246,7 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
// reduction is sentinel value.
Value *Cmp =
Builder.CreateCmp(CmpInst::ICMP_NE, MaxRdx, Sentinel, "rdx.select.cmp");
- return Builder.CreateSelect(Cmp, MaxRdx, StartVal, "rdx.select");
+ return Builder.CreateSelect(Cmp, MaxRdx, Start, "rdx.select");
}
Value *llvm::getReductionIdentity(Intrinsic::ID RdxID, Type *Ty,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 92160a421e59c..b47b444e5cfbc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7612,7 +7612,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
BasicBlock *BypassBlock) {
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
if (!EpiRedResult ||
- EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult)
+ (EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
+ EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
return;
auto *EpiRedHeaderPhi =
@@ -9817,8 +9818,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
Builder.createSelect(Cond, OrigExitingVPV, PhiR, {}, "", FMFs);
OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) {
return isa<VPInstruction>(&U) &&
- cast<VPInstruction>(&U)->getOpcode() ==
- VPInstruction::ComputeReductionResult;
+ (cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ComputeReductionResult ||
+ cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ComputeFindLastIVResult);
});
if (CM.usePredicatedReductionSelect(
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy))
@@ -9861,10 +9864,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// bc.merge.rdx phi nodes, hence it needs to be created unconditionally here
// even for in-loop reductions, until the reduction resume value handling is
// also modeled in VPlan.
+ VPInstruction *FinalReductionResult;
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(MiddleVPBB, IP);
- auto *FinalReductionResult = Builder.createNaryOp(
- VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ VPValue *Start = PhiR->getStartValue();
+ FinalReductionResult =
+ Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
+ {PhiR, Start, NewExitingVPV}, ExitDL);
+ } else {
+ FinalReductionResult = Builder.createNaryOp(
+ VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+ }
// Update all users outside the vector region.
OrigExitingVPV->replaceUsesWithIf(
FinalReductionResult, [FinalReductionResult](VPUser &User, unsigned) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 3059b87ae63c8..64e7f2bddb668 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -866,6 +866,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
+ ComputeFindLastIVResult,
ComputeReductionResult,
// Takes the VPValue to extract from as first operand and the lane or part
// to extract as second operand, counting from the end starting with 1 for
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 38bec733dbf73..24a166bd336d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -51,6 +51,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
switch (Opcode) {
case Instruction::ExtractElement:
+ case Instruction::Freeze:
return inferScalarType(R->getOperand(0));
case Instruction::Select: {
Type *ResTy = inferScalarType(R->getOperand(1));
@@ -66,6 +67,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1)) &&
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
+ case VPInstruction::ComputeFindLastIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c7190b3187d94..02ff3c5dff239 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -614,6 +614,28 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
+ case VPInstruction::ComputeFindLastIVResult: {
+ // The recipe's operands are the reduction phi, followed by one operand for
+ // each part of the reduction.
+ unsigned UF = getNumOperands() - 2;
+ Value *ReducedPartRdx = State.get(getOperand(2));
+ for (unsigned Part = 1; Part < UF; ++Part) {
+ ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx,
+ State.get(getOperand(2 + Part)));
+ }
+
+ // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
+ // and will be removed by breaking up the recipe further.
+ auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
+ // Get its reduction variable descriptor.
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+
+ assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK));
+ assert(!PhiR->isInLoop());
+ return createFindLastIVReduction(Builder, ReducedPartRdx,
+ State.get(getOperand(1), true), RdxDesc);
+ }
case VPInstruction::ComputeReductionResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
@@ -623,6 +645,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
+ assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
+ "should be handled by ComputeFindLastIVResult");
Type *PhiTy = OrigPhi->getType();
// The recipe's operands are the reduction phi, followed by one operand for
@@ -658,9 +682,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
- else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
- ReducedPartRdx =
- createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
@@ -669,8 +690,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
if ((State.VF.isVector() ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
- RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
!PhiR->isInLoop()) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
@@ -681,9 +701,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
ReducedPartRdx =
createAnyOfReduction(Builder, ReducedPartRdx, RdxDesc, OrigPhi);
- else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
- ReducedPartRdx =
- createFindLastIVReduction(Builder, ReducedPartRdx, RdxDesc);
else
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
@@ -829,6 +846,7 @@ bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractFromEnd ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
+ getOpcode() == VPInstruction::ComputeFindLastIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
}
@@ -933,6 +951,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
+ case VPInstruction::ComputeFindLastIVResult:
+ return Op == getOperand(1);
};
llvm_unreachable("switch should return");
}
@@ -1011,6 +1031,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractFromEnd:
O << "extract-from-end";
break;
+ case VPInstruction::ComputeFindLastIVResult:
+ O << "compute-find-last-iv-result";
+ break;
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
@@ -1571,7 +1594,6 @@ void VPWidenRecipe::execute(VPTransformState &State) {
}
case Instruction::Freeze: {
Value *Op = State.get(getOperand(0));
-
Value *Freeze = Builder.CreateFreeze(Op);
State.set(this, Freeze);
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index a36c2aeb3da5c..a513a255344cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -348,7 +348,9 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// the parts to compute the final reduction value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
- m_VPValue(), m_VPValue(Op1)))) {
+ m_VPValue(), m_VPValue(Op1))) ||
+ match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
+ m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
addUniformForAllParts(cast<VPInstruction>(&R));
for (unsigned Part = 1; Part != UF; ++Part)
R.addOperand(getValueForPart(Op1, Part));
|
@llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesKeep the start value as operand of ComputeFindLastIVResult. A follow-up Depends on #132689 (included in PR) Full diff: https://github.com/llvm/llvm-project/pull/132690.diff 7 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 193f505fb03fe..416a0a70325d1 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -423,7 +423,7 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src,
/// Create a reduction of the given vector \p Src for a reduction of the
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction
/// operation is described by \p Desc.
-Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
+Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, Value *Start,
const RecurrenceDescriptor &Desc);
/// Create an ordered reduction intrinsic using the given recurrence
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 2e7685254f512..f57d95e7722dc 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1233,11 +1233,11 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src,
}
Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
+ Value *Start,
const RecurrenceDescriptor &Desc) {
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
Desc.getRecurrenceKind()) &&
"Unexpected reduction kind");
- Value *StartVal = Desc.getRecurrenceStartValue();
Value *Sentinel = Desc.getSentinelValue();
Value *MaxRdx = Src->getType()->isVectorTy()
? Builder.CreateIntMaxReduce(Src, true)
@@ -1246,7 +1246,7 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
// reduction is sentinel value.
Value *Cmp =
Builder.CreateCmp(CmpInst::ICMP_NE, MaxRdx, Sentinel, "rdx.select.cmp");
- return Builder.CreateSelect(Cmp, MaxRdx, StartVal, "rdx.select");
+ return Builder.CreateSelect(Cmp, MaxRdx, Start, "rdx.select");
}
Value *llvm::getReductionIdentity(Intrinsic::ID RdxID, Type *Ty,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 92160a421e59c..b47b444e5cfbc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7612,7 +7612,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
BasicBlock *BypassBlock) {
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
if (!EpiRedResult ||
- EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult)
+ (EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
+ EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
return;
auto *EpiRedHeaderPhi =
@@ -9817,8 +9818,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
Builder.createSelect(Cond, OrigExitingVPV, PhiR, {}, "", FMFs);
OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) {
return isa<VPInstruction>(&U) &&
- cast<VPInstruction>(&U)->getOpcode() ==
- VPInstruction::ComputeReductionResult;
+ (cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ComputeReductionResult ||
+ cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ComputeFindLastIVResult);
});
if (CM.usePredicatedReductionSelect(
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy))
@@ -9861,10 +9864,19 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// bc.merge.rdx phi nodes, hence it needs to be created unconditionally here
// even for in-loop reductions, until the reduction resume value handling is
// also modeled in VPlan.
+ VPInstruction *FinalReductionResult;
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(MiddleVPBB, IP);
- auto *FinalReductionResult = Builder.createNaryOp(
- VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ VPValue *Start = PhiR->getStartValue();
+ FinalReductionResult =
+ Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
+ {PhiR, Start, NewExitingVPV}, ExitDL);
+ } else {
+ FinalReductionResult = Builder.createNaryOp(
+ VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+ }
// Update all users outside the vector region.
OrigExitingVPV->replaceUsesWithIf(
FinalReductionResult, [FinalReductionResult](VPUser &User, unsigned) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 3059b87ae63c8..64e7f2bddb668 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -866,6 +866,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
+ ComputeFindLastIVResult,
ComputeReductionResult,
// Takes the VPValue to extract from as first operand and the lane or part
// to extract as second operand, counting from the end starting with 1 for
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 38bec733dbf73..24a166bd336d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -51,6 +51,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
switch (Opcode) {
case Instruction::ExtractElement:
+ case Instruction::Freeze:
return inferScalarType(R->getOperand(0));
case Instruction::Select: {
Type *ResTy = inferScalarType(R->getOperand(1));
@@ -66,6 +67,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1)) &&
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
+ case VPInstruction::ComputeFindLastIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index c7190b3187d94..02ff3c5dff239 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -614,6 +614,28 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
+ case VPInstruction::ComputeFindLastIVResult: {
+ // The recipe's operands are the reduction phi, followed by one operand for
+ // each part of the reduction.
+ unsigned UF = getNumOperands() - 2;
+ Value *ReducedPartRdx = State.get(getOperand(2));
+ for (unsigned Part = 1; Part < UF; ++Part) {
+ ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx,
+ State.get(getOperand(2 + Part)));
+ }
+
+ // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
+ // and will be removed by breaking up the recipe further.
+ auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
+ // Get its reduction variable descriptor.
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+
+ assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK));
+ assert(!PhiR->isInLoop());
+ return createFindLastIVReduction(Builder, ReducedPartRdx,
+ State.get(getOperand(1), true), RdxDesc);
+ }
case VPInstruction::ComputeReductionResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
@@ -623,6 +645,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
+ assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
+ "should be handled by ComputeFindLastIVResult");
Type *PhiTy = OrigPhi->getType();
// The recipe's operands are the reduction phi, followed by one operand for
@@ -658,9 +682,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
- else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
- ReducedPartRdx =
- createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
@@ -669,8 +690,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
if ((State.VF.isVector() ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
- RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
!PhiR->isInLoop()) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
@@ -681,9 +701,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
ReducedPartRdx =
createAnyOfReduction(Builder, ReducedPartRdx, RdxDesc, OrigPhi);
- else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
- ReducedPartRdx =
- createFindLastIVReduction(Builder, ReducedPartRdx, RdxDesc);
else
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
@@ -829,6 +846,7 @@ bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractFromEnd ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
+ getOpcode() == VPInstruction::ComputeFindLastIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
}
@@ -933,6 +951,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
+ case VPInstruction::ComputeFindLastIVResult:
+ return Op == getOperand(1);
};
llvm_unreachable("switch should return");
}
@@ -1011,6 +1031,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractFromEnd:
O << "extract-from-end";
break;
+ case VPInstruction::ComputeFindLastIVResult:
+ O << "compute-find-last-iv-result";
+ break;
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
@@ -1571,7 +1594,6 @@ void VPWidenRecipe::execute(VPTransformState &State) {
}
case Instruction::Freeze: {
Value *Op = State.get(getOperand(0));
-
Value *Freeze = Builder.CreateFreeze(Op);
State.set(this, Freeze);
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index a36c2aeb3da5c..a513a255344cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -348,7 +348,9 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// the parts to compute the final reduction value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
- m_VPValue(), m_VPValue(Op1)))) {
+ m_VPValue(), m_VPValue(Op1))) ||
+ match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
+ m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
addUniformForAllParts(cast<VPInstruction>(&R));
for (unsigned Part = 1; Part != UF; ++Part)
R.addOperand(getValueForPart(Op1, Part));
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. I think AnyOf reductions will also need the same fix eventually, so maybe this could become something like ComputeSelectReductionResult
.
But I don't want to block this chain of PRs, I'm happy if we just start by fixing FindLastIV reductions.
Keep the start value as operand of ComputeFindLastIVResult. A follow-up patch will use this to make sure the start value is frozen if needed.
c551166
to
da91908
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep I'll probably also introduce a separate opcode for AnyOf reductions in any case, to simplify execute, thanks
…Result (NFC) (#132690) Keep the start value as operand of ComputeFindLastIVResult. A follow-up patch will use this to make sure the start value is frozen if needed. Depends on llvm/llvm-project#132689 PR: llvm/llvm-project#132690
FindLastIV introduces multiple uses of the start value, where in the original source there was only a single use, when the epilogue is vectorized. Each use of undef may produce a different result, so introducing multiple uses can produce incorrect results when the input is undef/poison. If the start value may be undef or poison, freeze it and use the frozen value, which will be the same at all uses. See the following scenarios in Alive2: * Both main and epilogue vector loops execute, go to exit block: https://alive2.llvm.org/ce/z/_TSvRr * Both main and epilogue vector loops execute, go to scalar loop: https://alive2.llvm.org/ce/z/CsPj5v * Only epilogue vector loop executes, go to exit block: https://alive2.llvm.org/ce/z/5XqkNV * Only epilogue vector loop executes, go to scalar loop: https://alive2.llvm.org/ce/z/JUpqRN The latter 2 show requiring freezing the resume phi. That means we cannot freeze in the preheader. We could move the freeze to the main iteration count check, but that would be a bit fragile to find and other transforms can sink the freeze if needed. Depends on #132689 and #132690. Fixes #126836 PR: #132691
…2691) FindLastIV introduces multiple uses of the start value, where in the original source there was only a single use, when the epilogue is vectorized. Each use of undef may produce a different result, so introducing multiple uses can produce incorrect results when the input is undef/poison. If the start value may be undef or poison, freeze it and use the frozen value, which will be the same at all uses. See the following scenarios in Alive2: * Both main and epilogue vector loops execute, go to exit block: https://alive2.llvm.org/ce/z/_TSvRr * Both main and epilogue vector loops execute, go to scalar loop: https://alive2.llvm.org/ce/z/CsPj5v * Only epilogue vector loop executes, go to exit block: https://alive2.llvm.org/ce/z/5XqkNV * Only epilogue vector loop executes, go to scalar loop: https://alive2.llvm.org/ce/z/JUpqRN The latter 2 show requiring freezing the resume phi. That means we cannot freeze in the preheader. We could move the freeze to the main iteration count check, but that would be a bit fragile to find and other transforms can sink the freeze if needed. Depends on llvm/llvm-project#132689 and llvm/llvm-project#132690. Fixes llvm/llvm-project#126836 PR: llvm/llvm-project#132691
…) (#132690) Keep the start value as operand of ComputeFindLastIVResult. A follow-up patch will use this to make sure the start value is frozen if needed. Depends on llvm/llvm-project#132689 PR: llvm/llvm-project#132690
FindLastIV introduces multiple uses of the start value, where in the original source there was only a single use, when the epilogue is vectorized. Each use of undef may produce a different result, so introducing multiple uses can produce incorrect results when the input is undef/poison. If the start value may be undef or poison, freeze it and use the frozen value, which will be the same at all uses. See the following scenarios in Alive2: * Both main and epilogue vector loops execute, go to exit block: https://alive2.llvm.org/ce/z/_TSvRr * Both main and epilogue vector loops execute, go to scalar loop: https://alive2.llvm.org/ce/z/CsPj5v * Only epilogue vector loop executes, go to exit block: https://alive2.llvm.org/ce/z/5XqkNV * Only epilogue vector loop executes, go to scalar loop: https://alive2.llvm.org/ce/z/JUpqRN The latter 2 show requiring freezing the resume phi. That means we cannot freeze in the preheader. We could move the freeze to the main iteration count check, but that would be a bit fragile to find and other transforms can sink the freeze if needed. Depends on llvm/llvm-project#132689 and llvm/llvm-project#132690. Fixes llvm/llvm-project#126836 PR: llvm/llvm-project#132691
llvm#132690) Keep the start value as operand of ComputeFindLastIVResult. A follow-up patch will use this to make sure the start value is frozen if needed. Depends on llvm#132689 PR: llvm#132690
FindLastIV introduces multiple uses of the start value, where in the original source there was only a single use, when the epilogue is vectorized. Each use of undef may produce a different result, so introducing multiple uses can produce incorrect results when the input is undef/poison. If the start value may be undef or poison, freeze it and use the frozen value, which will be the same at all uses. See the following scenarios in Alive2: * Both main and epilogue vector loops execute, go to exit block: https://alive2.llvm.org/ce/z/_TSvRr * Both main and epilogue vector loops execute, go to scalar loop: https://alive2.llvm.org/ce/z/CsPj5v * Only epilogue vector loop executes, go to exit block: https://alive2.llvm.org/ce/z/5XqkNV * Only epilogue vector loop executes, go to scalar loop: https://alive2.llvm.org/ce/z/JUpqRN The latter 2 show requiring freezing the resume phi. That means we cannot freeze in the preheader. We could move the freeze to the main iteration count check, but that would be a bit fragile to find and other transforms can sink the freeze if needed. Depends on llvm#132689 and llvm#132690. Fixes llvm#126836 PR: llvm#132691
Add a dedicated opcode for any-of reduction, similar to llvm/llvm-project#132689 and llvm/llvm-project#132690. The patch also explictly adds the start value to not require RecurrenceDescriptor during execute. It also allows freezing the start value to make it poison-safe. PR: llvm/llvm-project#141932
Add a dedicated opcode for any-of reduction, similar to llvm#132689 and llvm#132690. The patch also explictly adds the start value to not require RecurrenceDescriptor during execute. It also allows freezing the start value to make it poison-safe. PR: llvm#141932
Add a dedicated opcode for any-of reduction, similar to llvm#132689 and llvm#132690. The patch also explictly adds the start value to not require RecurrenceDescriptor during execute. It also allows freezing the start value to make it poison-safe. PR: llvm#141932
Keep the start value as operand of ComputeFindLastIVResult. A follow-up
patch will use this to make sure the start value is frozen if needed.
Depends on #132689 (included in PR)