-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Explicitly handle scalar pointer inductions. #83068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 26 commits
f4dabdf
b08e892
f56e217
172dbf6
916a7d2
d2c51ec
82d74df
c6797e6
53f2937
b0a78f6
e6d2db8
5065331
f38d682
a166da5
df9cad0
ab14184
133776f
d8173fb
0bb9f5c
3a698c0
1e41111
8d05e99
6f4516f
5f4e4aa
c936a4e
a9df1d9
9f68460
74cb095
4211565
643969c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1155,6 +1155,10 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
BranchOnCount, | ||
BranchOnCond, | ||
ComputeReductionResult, | ||
// Add an offset in bytes (second operand) to a base pointer (first | ||
// operand). Only generates scalar values (either for the first lane only or | ||
// for all lanes, depending on its uses). | ||
PtrAdd, | ||
}; | ||
|
||
private: | ||
|
@@ -1164,11 +1168,23 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
/// An optional name that can be used for the generated IR instruction. | ||
const std::string Name; | ||
|
||
/// Utility method serving execute(): generates a single instance of the | ||
/// modeled instruction. \returns the generated value for \p Part. | ||
/// In some cases an existing value is returned rather than a generated | ||
/// Returns true if this VPInstruction generates scalar values for all lanes. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding some context? E.g., There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||
bool doesGeneratePerAllLanes() const; | ||
|
||
/// Returns true if we can generate a scalar for the first lane only if | ||
/// needed. | ||
bool canGenerateScalarForFirstLane() const; | ||
|
||
/// Utility methods serving execute(): generates a single instance of the | ||
/// modeled instruction for a given part. \returns the generated value for \p | ||
/// Part. In some cases an existing value is returned rather than a generated | ||
/// one. | ||
Value *generateInstruction(VPTransformState &State, unsigned Part); | ||
Value *generatePerPart(VPTransformState &State, unsigned Part); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth updating documentation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||
|
||
/// Utility methods serving execute(): generates a scalar single instance of | ||
/// the modeled instruction for a given lane. \returns the scalar generated | ||
/// value for lane \p Lane. | ||
Value *generatePerLane(VPTransformState &State, const VPIteration &Lane); | ||
|
||
#if !defined(NDEBUG) | ||
/// Return true if the VPInstruction is a floating point math operation, i.e. | ||
|
@@ -2490,12 +2506,6 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { | |
/// for floating point inductions. | ||
const FPMathOperator *FPBinOp; | ||
|
||
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, | ||
const FPMathOperator *FPBinOp, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), | ||
Kind(Kind), FPBinOp(FPBinOp) {} | ||
|
||
public: | ||
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
|
@@ -2504,6 +2514,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { | |
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()), | ||
Start, CanonicalIV, Step) {} | ||
|
||
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, | ||
const FPMathOperator *FPBinOp, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), | ||
Kind(Kind), FPBinOp(FPBinOp) {} | ||
|
||
~VPDerivedIVRecipe() override = default; | ||
|
||
VPRecipeBase *clone() override { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { | |
CachedTypes[OtherV] = ResTy; | ||
return ResTy; | ||
} | ||
case VPInstruction::PtrAdd: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: worth asserting and caching the type of the other operand, i.e., join the above cases of ICmp and FOR Splice? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PtrAdd's operand have different types, with the first one being a pointer and the second one being an integer offset. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, right, of course. Perhaps worth a comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||
return inferScalarType(R->getOperand(0)); | ||
default: | ||
break; | ||
} | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { | |||||
case VPInstruction::Not: | ||||||
case VPInstruction::CalculateTripCountMinusVF: | ||||||
case VPInstruction::CanonicalIVIncrementForPart: | ||||||
case VPInstruction::PtrAdd: | ||||||
return false; | ||||||
default: | ||||||
return true; | ||||||
|
@@ -270,10 +271,47 @@ VPInstruction::VPInstruction(unsigned Opcode, | |||||
assert(isFPMathOp() && "this op can't take fast-math flags"); | ||||||
} | ||||||
|
||||||
Value *VPInstruction::generateInstruction(VPTransformState &State, | ||||||
unsigned Part) { | ||||||
bool VPInstruction::doesGeneratePerAllLanes() const { | ||||||
return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); | ||||||
} | ||||||
|
||||||
bool VPInstruction::canGenerateScalarForFirstLane() const { | ||||||
if (Instruction::isBinaryOp(getOpcode())) | ||||||
return true; | ||||||
|
||||||
switch (Opcode) { | ||||||
case VPInstruction::BranchOnCond: | ||||||
case VPInstruction::BranchOnCount: | ||||||
case VPInstruction::CalculateTripCountMinusVF: | ||||||
case VPInstruction::CanonicalIVIncrementForPart: | ||||||
case VPInstruction::ComputeReductionResult: | ||||||
case VPInstruction::PtrAdd: | ||||||
return true; | ||||||
default: | ||||||
return false; | ||||||
} | ||||||
} | ||||||
|
||||||
Value *VPInstruction::generatePerLane(VPTransformState &State, | ||||||
const VPIteration &Lane) { | ||||||
IRBuilderBase &Builder = State.Builder; | ||||||
|
||||||
switch (getOpcode()) { | ||||||
case VPInstruction::PtrAdd: { | ||||||
auto *P = Builder.CreatePtrAdd(State.get(getOperand(0), Lane), | ||||||
State.get(getOperand(1), Lane), Name); | ||||||
return P; | ||||||
} | ||||||
default: { | ||||||
Value *Res = generatePerPart(State, Lane.Part); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, this raises an eyebrow, or two. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a consequence of |
||||||
assert(!hasResult() || !Res->getType()->isVectorTy()); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. missing error message. This assert is taken care of by the caller of generatePerPart(); could this be done consistently? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assert is gone now. |
||||||
return Res; | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { | ||||||
IRBuilderBase &Builder = State.Builder; | ||||||
Builder.SetCurrentDebugLocation(getDebugLoc()); | ||||||
|
||||||
if (Instruction::isBinaryOp(getOpcode())) { | ||||||
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); | ||||||
|
@@ -349,7 +387,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, | |||||
Value *Step = | ||||||
createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); | ||||||
Value *Sub = Builder.CreateSub(ScalarTC, Step); | ||||||
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); | ||||||
Value *Cmp = | ||||||
Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: unrelated clang-format fix? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. undone, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. still visible, missing a commit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be undone now, need to be careful to avoid clang-format from undoing the change. |
||||||
Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); | ||||||
return Builder.CreateSelect(Cmp, Sub, Zero); | ||||||
} | ||||||
|
@@ -511,17 +550,33 @@ void VPInstruction::execute(VPTransformState &State) { | |||||
"Recipe not a FPMathOp but has fast-math flags?"); | ||||||
if (hasFastMathFlags()) | ||||||
State.Builder.setFastMathFlags(getFastMathFlags()); | ||||||
State.Builder.SetCurrentDebugLocation(getDebugLoc()); | ||||||
bool OnlyGenerateFirstLane = | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
to be more consistent? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||
canGenerateScalarForFirstLane() && | ||||||
(vputils::onlyFirstLaneUsed(this) || | ||||||
getOpcode() == VPInstruction::ComputeReductionResult); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Unrelated to this patch, while we're here: why is ComputeReductionResult an exception?) |
||||||
bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); | ||||||
for (unsigned Part = 0; Part < State.UF; ++Part) { | ||||||
Value *GeneratedValue = generateInstruction(State, Part); | ||||||
if (GeneratesPerAllLanes) { | ||||||
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); | ||||||
Lane != NumLanes; ++Lane) { | ||||||
Value *P = generatePerLane(State, VPIteration(Part, Lane)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can assert P here, consistent with asserts of GeneratedValue below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||||||
State.set(this, P, VPIteration(Part, Lane)); | ||||||
} | ||||||
continue; | ||||||
} | ||||||
|
||||||
Value *GeneratedValue = OnlyGenerateFirstLane | ||||||
? generatePerLane(State, VPIteration(Part, 0)) | ||||||
: generatePerPart(State, Part); | ||||||
if (!hasResult()) | ||||||
continue; | ||||||
assert(GeneratedValue && "generateInstruction must produce a value"); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. generatePerPart rather than generateInstruction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed, thanks! |
||||||
|
||||||
bool IsVector = GeneratedValue->getType()->isVectorTy(); | ||||||
State.set(this, GeneratedValue, Part, !IsVector); | ||||||
assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult || | ||||||
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) && | ||||||
"scalar value but not only first lane used"); | ||||||
State.set(this, GeneratedValue, Part, /*IsScalar*/ OnlyGenerateFirstLane); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: better to set State after the following assert. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Order flipped, thanks! |
||||||
assert((GeneratedValue->getType()->isVectorTy() == !OnlyGenerateFirstLane || | ||||||
State.VF.isScalar()) && | ||||||
"scalar value but not only first lane defined"); | ||||||
} | ||||||
} | ||||||
|
||||||
|
@@ -534,6 +589,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |||||
default: | ||||||
return false; | ||||||
case Instruction::ICmp: | ||||||
case VPInstruction::PtrAdd: | ||||||
// TODO: Cover additional opcodes. | ||||||
return vputils::onlyFirstLaneUsed(this); | ||||||
case VPInstruction::ActiveLaneMask: | ||||||
|
@@ -591,6 +647,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |||||
case VPInstruction::ComputeReductionResult: | ||||||
O << "compute-reduction-result"; | ||||||
break; | ||||||
case VPInstruction::PtrAdd: | ||||||
O << "ptradd"; | ||||||
break; | ||||||
default: | ||||||
O << Instruction::getOpcodeName(getOpcode()); | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -499,15 +499,18 @@ static void removeDeadRecipes(VPlan &Plan) { | |||||
} | ||||||
} | ||||||
|
||||||
static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, | ||||||
static VPValue *createScalarIVSteps(VPlan &Plan, | ||||||
InductionDescriptor::InductionKind Kind, | ||||||
Instruction::BinaryOps InductionOpcode, | ||||||
FPMathOperator *FPBinOp, | ||||||
ScalarEvolution &SE, Instruction *TruncI, | ||||||
VPValue *StartV, VPValue *Step, | ||||||
VPBasicBlock::iterator IP) { | ||||||
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | ||||||
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); | ||||||
VPSingleDefRecipe *BaseIV = CanonicalIV; | ||||||
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) { | ||||||
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); | ||||||
if (!CanonicalIV->isCanonical(Kind, StartV, Step)) { | ||||||
BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step); | ||||||
HeaderVPBB->insert(BaseIV, IP); | ||||||
} | ||||||
|
||||||
|
@@ -537,21 +540,54 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, | |||||
VecPreheader->appendRecipe(Step->getDefiningRecipe()); | ||||||
} | ||||||
|
||||||
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); | ||||||
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( | ||||||
BaseIV, Step, InductionOpcode, | ||||||
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()); | ||||||
HeaderVPBB->insert(Steps, IP); | ||||||
return Steps; | ||||||
} | ||||||
|
||||||
/// If any user of a VPWidenIntOrFpInductionRecipe needs scalar values, | ||||||
/// provide them by building scalar steps off of the canonical scalar IV and | ||||||
/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd | ||||||
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as | ||||||
/// VPWidenPointerInductionRecipe cannot generate scalars. Also optimize | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
So if scalar are used along with vectors of widen pointer induction, the former will be extracted from the latter; as opposed to widening int or fp inductions, where scalar users are fed scalar steps even if there are also vector users. Right? This preserves current behavior, but worth noting the discrepancy, potentially removing it later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tried to clarify, thanks! |
||||||
/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by | ||||||
/// providing them by building scalar steps off of the canonical scalar IV and | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adjusted, thanks! |
||||||
/// update the original IV's users. This is an optional optimization to reduce | ||||||
/// the needs of vector extracts. | ||||||
static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | ||||||
static void legalizeAndOptimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | ||||||
SmallVector<VPRecipeBase *> ToRemove; | ||||||
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | ||||||
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); | ||||||
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi(); | ||||||
for (VPRecipeBase &Phi : HeaderVPBB->phis()) { | ||||||
// Replace wide pointer inductions which have only their scalars used by | ||||||
// PtrAdd(IndStart, ScalarIVSteps (0, Step)). | ||||||
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) { | ||||||
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF())) | ||||||
continue; | ||||||
|
||||||
const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); | ||||||
VPValue *StartV = Plan.getVPValueOrAddLiveIn( | ||||||
ConstantInt::get(ID.getStep()->getType(), 0)); | ||||||
VPValue *StepV = PtrIV->getOperand(1); | ||||||
VPRecipeBase *Steps = | ||||||
createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction, | ||||||
Instruction::Add, nullptr, SE, nullptr, StartV, | ||||||
StepV, InsertPt) | ||||||
->getDefiningRecipe(); | ||||||
|
||||||
auto *Recipe = | ||||||
new VPInstruction(VPInstruction::PtrAdd, | ||||||
{PtrIV->getStartValue(), Steps->getVPSingleValue()}, | ||||||
PtrIV->getDebugLoc(), "next.gep"); | ||||||
|
||||||
Recipe->insertAfter(Steps); | ||||||
PtrIV->replaceAllUsesWith(Recipe); | ||||||
continue; | ||||||
} | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth moving here some of the documentation above that described what happens next. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment, thanks! |
||||||
// Replace widened induction with scalar steps for users that only use | ||||||
// scalars. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be good if these two cases that createScalarIVSteps for scalar users only, would share something. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think try to share |
||||||
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); | ||||||
if (!WideIV) | ||||||
continue; | ||||||
|
@@ -561,9 +597,11 @@ static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | |||||
continue; | ||||||
|
||||||
const InductionDescriptor &ID = WideIV->getInductionDescriptor(); | ||||||
VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(), | ||||||
WideIV->getStartValue(), | ||||||
WideIV->getStepValue(), InsertPt); | ||||||
VPValue *Steps = createScalarIVSteps( | ||||||
Plan, ID.getKind(), ID.getInductionOpcode(), | ||||||
dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE, | ||||||
WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(), | ||||||
InsertPt); | ||||||
|
||||||
// Update scalar users of IV to use Step instead. | ||||||
if (!HasOnlyVectorVFs) | ||||||
|
@@ -1026,7 +1064,7 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) { | |||||
removeRedundantInductionCasts(Plan); | ||||||
|
||||||
simplifyRecipes(Plan, SE.getContext()); | ||||||
optimizeInductions(Plan, SE); | ||||||
legalizeAndOptimizeInductions(Plan, SE); | ||||||
removeDeadRecipes(Plan); | ||||||
|
||||||
createAndOptimizeReplicateRegions(Plan); | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth documenting somewhere what this VPInstruction/Opcode represents, including being scalar.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added, thanks!