-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Introduce ComputeReductionResult VPInstruction opcode. #70253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,7 @@ | |
#include "llvm/Support/Debug.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||
#include "llvm/Transforms/Utils/LoopUtils.h" | ||
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" | ||
#include <cassert> | ||
|
||
|
@@ -401,6 +402,84 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, | |
Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); | ||
return CondBr; | ||
} | ||
case VPInstruction::ComputeReductionResult: { | ||
if (Part != 0) | ||
return State.get(this, 0); | ||
|
||
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary | ||
// and will be removed by breaking up the recipe further. | ||
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LoopExitingDef should arguably be the only Operand of ComputeReductionResult, as it (only) truly feeds it with its values. PhiR is "used" only to access information it holds, and could potentially be retrieved as a user of LoopExitingDef(?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but should this also be addressed directly here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Current option is ok, temporarily. Alternatives listed have their disadvantages. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, thanks! |
||
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); | ||
// Get its reduction variable descriptor. | ||
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggesting a ComputeReductionResult recipe could hold its RdxDesc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be an option, see my previous comment. There are multiple properties needed from PhiR here as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -- which exacerbates the undesired issue of implicit cross-recipe dependences. Please add a FIXME to emphasize that this is an intermediate situation to support gradual refactoring, possibly also clarifying at the summary that, rather than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a FIXME, thanks! |
||
|
||
RecurKind RK = RdxDesc.getRecurrenceKind(); | ||
|
||
State.setDebugLocFrom(getDebugLoc()); | ||
|
||
VPValue *LoopExitingDef = getOperand(1); | ||
Type *PhiTy = OrigPhi->getType(); | ||
VectorParts RdxParts(State.UF); | ||
for (unsigned Part = 0; Part < State.UF; ++Part) | ||
RdxParts[Part] = State.get(LoopExitingDef, Part); | ||
|
||
// If the vector reduction can be performed in a smaller type, we truncate | ||
// then extend the loop exit value to enable InstCombine to evaluate the | ||
// entire expression in the smaller type. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated to this patch: better handle type size reductions as part of truncateToMinBW, along with all other type shrinkings, rather than here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, added a TODO |
||
// TODO: Handle this in truncateToMinBW. | ||
if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { | ||
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF); | ||
for (unsigned Part = 0; Part < State.UF; ++Part) | ||
RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); | ||
} | ||
// Reduce all of the unrolled parts into a single vector. | ||
Value *ReducedPartRdx = RdxParts[0]; | ||
unsigned Op = RecurrenceDescriptor::getOpcode(RK); | ||
|
||
if (PhiR->isOrdered()) { | ||
ReducedPartRdx = RdxParts[State.UF - 1]; | ||
} else { | ||
// Floating-point operations should have some FMF to enable the reduction. | ||
IRBuilderBase::FastMathFlagGuard FMFG(Builder); | ||
Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); | ||
for (unsigned Part = 1; Part < State.UF; ++Part) { | ||
Value *RdxPart = RdxParts[Part]; | ||
if (Op != Instruction::ICmp && Op != Instruction::FCmp) | ||
ReducedPartRdx = Builder.CreateBinOp( | ||
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); | ||
else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { | ||
TrackingVH<Value> ReductionStartValue = | ||
RdxDesc.getRecurrenceStartValue(); | ||
ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK, | ||
ReducedPartRdx, RdxPart); | ||
} else | ||
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); | ||
} | ||
} | ||
|
||
// Create the reduction after the loop. Note that inloop reductions create | ||
// the target reduction in the loop using a Reduction recipe. | ||
if (State.VF.isVector() && !PhiR->isInLoop()) { | ||
ReducedPartRdx = | ||
createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); | ||
// If the reduction can be performed in a smaller type, we need to extend | ||
// the reduction to the wider type before we branch to the original loop. | ||
if (PhiTy != RdxDesc.getRecurrenceType()) | ||
ReducedPartRdx = RdxDesc.isSigned() | ||
? Builder.CreateSExt(ReducedPartRdx, PhiTy) | ||
: Builder.CreateZExt(ReducedPartRdx, PhiTy); | ||
} | ||
|
||
// If there were stores of the reduction value to a uniform memory address | ||
// inside the loop, create the final store here. | ||
if (StoreInst *SI = RdxDesc.IntermediateStore) { | ||
auto *NewSI = Builder.CreateAlignedStore( | ||
ReducedPartRdx, SI->getPointerOperand(), SI->getAlign()); | ||
propagateMetadata(NewSI, SI); | ||
} | ||
|
||
return ReducedPartRdx; | ||
} | ||
default: | ||
llvm_unreachable("Unsupported opcode for instruction"); | ||
} | ||
|
@@ -477,6 +556,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
case VPInstruction::BranchOnCount: | ||
O << "branch-on-count"; | ||
break; | ||
case VPInstruction::ComputeReductionResult: | ||
O << "compute-reduction-result"; | ||
break; | ||
default: | ||
O << Instruction::getOpcodeName(getOpcode()); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: above "Generate the IR code for the body of the vectorized loop" is no longer accurate. Suggest to update with something like "Generate the IR code for the vectorized loop captured in VPlan"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adjusted, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still awaits adjustment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be adjusted now, thanks!