-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Explicitly handle scalar pointer inductions. #83068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f4dabdf
b08e892
f56e217
172dbf6
916a7d2
d2c51ec
82d74df
c6797e6
53f2937
b0a78f6
e6d2db8
5065331
f38d682
a166da5
df9cad0
ab14184
133776f
d8173fb
0bb9f5c
3a698c0
1e41111
8d05e99
6f4516f
5f4e4aa
c936a4e
a9df1d9
9f68460
74cb095
4211565
643969c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1155,6 +1155,10 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
BranchOnCount, | ||
BranchOnCond, | ||
ComputeReductionResult, | ||
// Add an offset in bytes (second operand) to a base pointer (first | ||
// operand). Only generates scalar values (either for the first lane only or | ||
// for all lanes, depending on its uses). | ||
PtrAdd, | ||
}; | ||
|
||
private: | ||
|
@@ -1164,11 +1168,28 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
/// An optional name that can be used for the generated IR instruction. | ||
const std::string Name; | ||
|
||
/// Utility method serving execute(): generates a single instance of the | ||
/// modeled instruction. \returns the generated value for \p Part. | ||
/// In some cases an existing value is returned rather than a generated | ||
/// Returns true if this VPInstruction generates scalar values for all lanes. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding some context? E.g., There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||
/// Most VPInstructions generate a single value per part, either vector or | ||
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar) | ||
/// values per all lanes, stemming from an original ingredient. This method | ||
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an | ||
/// underlying ingredient. | ||
bool doesGeneratePerAllLanes() const; | ||
|
||
/// Returns true if we can generate a scalar for the first lane only if | ||
/// needed. | ||
bool canGenerateScalarForFirstLane() const; | ||
|
||
/// Utility methods serving execute(): generates a single instance of the | ||
/// modeled instruction for a given part. \returns the generated value for \p | ||
/// Part. In some cases an existing value is returned rather than a generated | ||
/// one. | ||
Value *generateInstruction(VPTransformState &State, unsigned Part); | ||
Value *generatePerPart(VPTransformState &State, unsigned Part); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth updating documentation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||
|
||
/// Utility methods serving execute(): generates a scalar single instance of | ||
/// the modeled instruction for a given lane. \returns the scalar generated | ||
/// value for lane \p Lane. | ||
Value *generatePerLane(VPTransformState &State, const VPIteration &Lane); | ||
|
||
#if !defined(NDEBUG) | ||
/// Return true if the VPInstruction is a floating point math operation, i.e. | ||
|
@@ -2491,12 +2512,6 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { | |
/// for floating point inductions. | ||
const FPMathOperator *FPBinOp; | ||
|
||
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, | ||
const FPMathOperator *FPBinOp, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), | ||
Kind(Kind), FPBinOp(FPBinOp) {} | ||
|
||
public: | ||
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
|
@@ -2505,6 +2520,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { | |
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()), | ||
Start, CanonicalIV, Step) {} | ||
|
||
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, | ||
const FPMathOperator *FPBinOp, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), | ||
Kind(Kind), FPBinOp(FPBinOp) {} | ||
|
||
~VPDerivedIVRecipe() override = default; | ||
|
||
VPRecipeBase *clone() override { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { | |
CachedTypes[OtherV] = ResTy; | ||
return ResTy; | ||
} | ||
case VPInstruction::PtrAdd: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: worth asserting and caching the type of the other operand, i.e., join the above cases of ICmp and FOR Splice? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PtrAdd's operand have different types, with the first one being a pointer and the second one being an integer offset. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, right, of course. Perhaps worth a comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||
// Return the type based on the pointer argument (i.e. first operand). | ||
return inferScalarType(R->getOperand(0)); | ||
default: | ||
break; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { | |
case VPInstruction::Not: | ||
case VPInstruction::CalculateTripCountMinusVF: | ||
case VPInstruction::CanonicalIVIncrementForPart: | ||
case VPInstruction::PtrAdd: | ||
return false; | ||
default: | ||
return true; | ||
|
@@ -270,10 +271,39 @@ VPInstruction::VPInstruction(unsigned Opcode, | |
assert(isFPMathOp() && "this op can't take fast-math flags"); | ||
} | ||
|
||
Value *VPInstruction::generateInstruction(VPTransformState &State, | ||
unsigned Part) { | ||
bool VPInstruction::doesGeneratePerAllLanes() const { | ||
return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this); | ||
} | ||
|
||
bool VPInstruction::canGenerateScalarForFirstLane() const { | ||
if (Instruction::isBinaryOp(getOpcode())) | ||
return true; | ||
|
||
switch (Opcode) { | ||
case VPInstruction::BranchOnCond: | ||
case VPInstruction::BranchOnCount: | ||
case VPInstruction::CalculateTripCountMinusVF: | ||
case VPInstruction::CanonicalIVIncrementForPart: | ||
case VPInstruction::ComputeReductionResult: | ||
case VPInstruction::PtrAdd: | ||
return true; | ||
default: | ||
return false; | ||
} | ||
} | ||
|
||
Value *VPInstruction::generatePerLane(VPTransformState &State, | ||
const VPIteration &Lane) { | ||
IRBuilderBase &Builder = State.Builder; | ||
|
||
assert(getOpcode() == VPInstruction::PtrAdd && | ||
"only PtrAdd opcodes are supported for now"); | ||
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), | ||
State.get(getOperand(1), Lane), Name); | ||
} | ||
|
||
Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { | ||
IRBuilderBase &Builder = State.Builder; | ||
Builder.SetCurrentDebugLocation(getDebugLoc()); | ||
|
||
if (Instruction::isBinaryOp(getOpcode())) { | ||
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); | ||
|
@@ -490,6 +520,13 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, | |
|
||
return ReducedPartRdx; | ||
} | ||
case VPInstruction::PtrAdd: { | ||
assert(vputils::onlyFirstLaneUsed(this) && | ||
"can only generate first lane for PtrAdd"); | ||
Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true); | ||
Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); | ||
return Builder.CreatePtrAdd(Ptr, Addend, Name); | ||
} | ||
default: | ||
llvm_unreachable("Unsupported opcode for instruction"); | ||
} | ||
|
@@ -514,17 +551,33 @@ void VPInstruction::execute(VPTransformState &State) { | |
"Recipe not a FPMathOp but has fast-math flags?"); | ||
if (hasFastMathFlags()) | ||
State.Builder.setFastMathFlags(getFastMathFlags()); | ||
State.Builder.SetCurrentDebugLocation(getDebugLoc()); | ||
bool GeneratesPerFirstLaneOnly = | ||
canGenerateScalarForFirstLane() && | ||
(vputils::onlyFirstLaneUsed(this) || | ||
getOpcode() == VPInstruction::ComputeReductionResult); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Unrelated to this patch, while we're here: why is ComputeReductionResult an exception?) |
||
bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); | ||
for (unsigned Part = 0; Part < State.UF; ++Part) { | ||
Value *GeneratedValue = generateInstruction(State, Part); | ||
if (!hasResult()) | ||
if (GeneratesPerAllLanes) { | ||
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); | ||
Lane != NumLanes; ++Lane) { | ||
Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane)); | ||
assert(GeneratedValue && "generatePerLane must produce a value"); | ||
State.set(this, GeneratedValue, VPIteration(Part, Lane)); | ||
} | ||
continue; | ||
assert(GeneratedValue && "generateInstruction must produce a value"); | ||
} | ||
|
||
bool IsVector = GeneratedValue->getType()->isVectorTy(); | ||
State.set(this, GeneratedValue, Part, !IsVector); | ||
assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult || | ||
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) && | ||
"scalar value but not only first lane used"); | ||
Value *GeneratedValue = generatePerPart(State, Part); | ||
if (!hasResult()) | ||
continue; | ||
assert(GeneratedValue && "generatePerPart must produce a value"); | ||
assert((GeneratedValue->getType()->isVectorTy() == | ||
!GeneratesPerFirstLaneOnly || | ||
State.VF.isScalar()) && | ||
"scalar value but not only first lane defined"); | ||
State.set(this, GeneratedValue, Part, | ||
/*IsScalar*/ GeneratesPerFirstLaneOnly); | ||
} | ||
} | ||
|
||
|
@@ -537,6 +590,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
default: | ||
return false; | ||
case Instruction::ICmp: | ||
case VPInstruction::PtrAdd: | ||
// TODO: Cover additional opcodes. | ||
return vputils::onlyFirstLaneUsed(this); | ||
case VPInstruction::ActiveLaneMask: | ||
|
@@ -594,6 +648,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
case VPInstruction::ComputeReductionResult: | ||
O << "compute-reduction-result"; | ||
break; | ||
case VPInstruction::PtrAdd: | ||
O << "ptradd"; | ||
break; | ||
default: | ||
O << Instruction::getOpcodeName(getOpcode()); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -498,15 +498,18 @@ static void removeDeadRecipes(VPlan &Plan) { | |
} | ||
} | ||
|
||
static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, | ||
static VPValue *createScalarIVSteps(VPlan &Plan, | ||
InductionDescriptor::InductionKind Kind, | ||
Instruction::BinaryOps InductionOpcode, | ||
FPMathOperator *FPBinOp, | ||
ScalarEvolution &SE, Instruction *TruncI, | ||
VPValue *StartV, VPValue *Step, | ||
VPBasicBlock::iterator IP) { | ||
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | ||
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); | ||
VPSingleDefRecipe *BaseIV = CanonicalIV; | ||
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) { | ||
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); | ||
if (!CanonicalIV->isCanonical(Kind, StartV, Step)) { | ||
BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step); | ||
HeaderVPBB->insert(BaseIV, IP); | ||
} | ||
|
||
|
@@ -536,21 +539,56 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, | |
VecPreheader->appendRecipe(Step->getDefiningRecipe()); | ||
} | ||
|
||
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); | ||
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( | ||
BaseIV, Step, InductionOpcode, | ||
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()); | ||
HeaderVPBB->insert(Steps, IP); | ||
return Steps; | ||
} | ||
|
||
/// If any user of a VPWidenIntOrFpInductionRecipe needs scalar values, | ||
/// provide them by building scalar steps off of the canonical scalar IV and | ||
/// update the original IV's users. This is an optional optimization to reduce | ||
/// the needs of vector extracts. | ||
static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | ||
/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd | ||
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as | ||
/// VPWidenPointerInductionRecipe will generate vectors only. If some users | ||
/// require vectors while other require scalars, the scalar uses need to extract | ||
/// the scalars from the generated vectors (Note that this is different to how | ||
/// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe, | ||
/// if any of its users needs scalar values, by providing them scalar steps | ||
/// built on the canonical scalar IV and update the original IV's users. This is | ||
/// an optional optimization to reduce the needs of vector extracts. | ||
static void legalizeAndOptimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | ||
SmallVector<VPRecipeBase *> ToRemove; | ||
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | ||
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); | ||
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi(); | ||
for (VPRecipeBase &Phi : HeaderVPBB->phis()) { | ||
// Replace wide pointer inductions which have only their scalars used by | ||
// PtrAdd(IndStart, ScalarIVSteps (0, Step)). | ||
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) { | ||
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF())) | ||
continue; | ||
|
||
const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); | ||
VPValue *StartV = Plan.getVPValueOrAddLiveIn( | ||
ConstantInt::get(ID.getStep()->getType(), 0)); | ||
VPValue *StepV = PtrIV->getOperand(1); | ||
VPRecipeBase *Steps = | ||
createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction, | ||
Instruction::Add, nullptr, SE, nullptr, StartV, | ||
StepV, InsertPt) | ||
->getDefiningRecipe(); | ||
|
||
auto *Recipe = | ||
new VPInstruction(VPInstruction::PtrAdd, | ||
{PtrIV->getStartValue(), Steps->getVPSingleValue()}, | ||
PtrIV->getDebugLoc(), "next.gep"); | ||
|
||
Recipe->insertAfter(Steps); | ||
PtrIV->replaceAllUsesWith(Recipe); | ||
continue; | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth moving here some of the documentation above that described what happens next. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment, thanks! |
||
// Replace widened induction with scalar steps for users that only use | ||
// scalars. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be good if these two cases that createScalarIVSteps for scalar users only, would share something. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think try to share |
||
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); | ||
if (!WideIV) | ||
continue; | ||
|
@@ -560,9 +598,11 @@ static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | |
continue; | ||
|
||
const InductionDescriptor &ID = WideIV->getInductionDescriptor(); | ||
VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(), | ||
WideIV->getStartValue(), | ||
WideIV->getStepValue(), InsertPt); | ||
VPValue *Steps = createScalarIVSteps( | ||
Plan, ID.getKind(), ID.getInductionOpcode(), | ||
dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE, | ||
WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(), | ||
InsertPt); | ||
|
||
// Update scalar users of IV to use Step instead. | ||
if (!HasOnlyVectorVFs) | ||
|
@@ -1025,7 +1065,7 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) { | |
removeRedundantInductionCasts(Plan); | ||
|
||
simplifyRecipes(Plan, SE.getContext()); | ||
optimizeInductions(Plan, SE); | ||
legalizeAndOptimizeInductions(Plan, SE); | ||
removeDeadRecipes(Plan); | ||
|
||
createAndOptimizeReplicateRegions(Plan); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth documenting somewhere what this VPInstruction/Opcode represents, including being scalar.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added, thanks!